diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java index 9a525adeb9..5c4d482226 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePointLookupOptimizerRule.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -43,8 +44,12 @@ import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.hadoop.hbase.shaded.com.google.common.graph.MutableValueGraph; +import org.apache.hadoop.hbase.shaded.com.google.common.graph.ValueGraphBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule.RexTransformIntoInClause.RexNodeRef; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -174,9 +179,129 @@ public RexNode analyzeRexNode(RexBuilder rexBuilder, RexNode condition) { // 2. We merge IN expressions RexMergeInClause mergeInClause = new RexMergeInClause(rexBuilder); newCondition = mergeInClause.apply(newCondition); + + // 3. Close BETWEEN expressions if possible + RexTranformIntoBetween t = new RexTranformIntoBetween(rexBuilder); + newCondition = t.apply(newCondition); return newCondition; } + /** + * Transforms OR clauses into IN clauses, when possible. + */ + protected static class RexTranformIntoBetween extends RexShuttle { + private final RexBuilder rexBuilder; + + RexTranformIntoBetween(RexBuilder rexBuilder) { + this.rexBuilder = rexBuilder; + } + + @Override + public RexNode visitCall(RexCall inputCall) { + RexNode node = super.visitCall(inputCall); + if (node instanceof RexCall) { + RexCall call = (RexCall) node; + switch (call.getKind()) { + case AND: + return processComparisions(call, SqlKind.LESS_THAN_OR_EQUAL, false); + case OR: + return processComparisions(call, SqlKind.GREATER_THAN, true); + default: + break; + } + } + return node; + } + + /** + * Represents a replacement candidate. + */ + static class BetweenCandidate { + + private final RexNode newNode; + private final RexNode[] oldNodes; + // keeps track if this candidate was already used during replacement + private boolean used; + + public BetweenCandidate(RexNode newNode, RexNode... oldNodes) { + this.newNode = newNode; + this.oldNodes = oldNodes; + } + } + + private RexNode processComparisions(RexCall call, SqlKind forwardEdge, boolean invert) { + MutableValueGraph g = + buildComparisionGraph(call.getOperands(), forwardEdge); + Map replacedNodes = new IdentityHashMap<>(); + for (RexNodeRef n : g.nodes()) { + Set pred = g.predecessors(n); + Set succ = g.successors(n); + if (pred.size() > 0 && succ.size() > 0) { + RexNodeRef p = pred.iterator().next(); + RexNodeRef s = succ.iterator().next(); + + RexNode between = rexBuilder.makeCall(HiveBetween.INSTANCE, + rexBuilder.makeLiteral(invert), n.node, p.node, s.node); + BetweenCandidate bc = new BetweenCandidate( + between, + g.removeEdge(p, n), + g.removeEdge(n, s)); + + for (RexNode node : bc.oldNodes) { + replacedNodes.put(node, bc); + } + } + } + if (replacedNodes.isEmpty()) { + // no effect + return call; + } + List newOperands = new ArrayList<>(); + for (RexNode o : call.getOperands()) { + BetweenCandidate candidate = replacedNodes.get(o); + if (candidate == null) { + newOperands.add(o); + } else { + if (!candidate.used) { + newOperands.add(candidate.newNode); + candidate.used = true; + } + } + } + + if (newOperands.size() == 1) { + return newOperands.get(0); + } else { + return rexBuilder.makeCall(call.getOperator(), newOperands); + } + } + + /** + * Builds a graph of the given comparision type. + * + * The graph edges are annotated with the RexNodes representing the comparision. + */ + private MutableValueGraph buildComparisionGraph(List operands, SqlKind cmpForward) { + MutableValueGraph g = ValueGraphBuilder.directed().build(); + for (RexNode node : operands) { + if(!(node instanceof RexCall) ) { + continue; + } + RexCall rexCall = (RexCall) node; + SqlKind kind = rexCall.getKind(); + if (kind == cmpForward) { + RexNode opA = rexCall.getOperands().get(0); + RexNode opB = rexCall.getOperands().get(1); + g.putEdgeValue(new RexNodeRef(opA), new RexNodeRef(opB), rexCall); + } else if (kind == cmpForward.reverse()) { + RexNode opA = rexCall.getOperands().get(1); + RexNode opB = rexCall.getOperands().get(0); + g.putEdgeValue(new RexNodeRef(opA), new RexNodeRef(opB), rexCall); + } + } + return g; + } + } /** * Transforms OR clauses into IN clauses, when possible. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index a920e4096e..d15c710c5e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -35,6 +35,7 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlBinaryOperator; import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlKind; @@ -368,6 +369,23 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { childRexNodeLst = adjustCaseBranchTypes(childRexNodeLst, retType); } else if (calciteOp == HiveToDateSqlOperator.INSTANCE) { childRexNodeLst = rewriteToDateChildren(childRexNodeLst); + } else if (calciteOp.getKind() == SqlKind.BETWEEN) { + assert childRexNodeLst.get(0).isAlwaysTrue() || childRexNodeLst.get(0).isAlwaysFalse(); + boolean invert = childRexNodeLst.get(0).isAlwaysTrue(); + SqlBinaryOperator cmpOp; + if (invert) { + calciteOp = SqlStdOperatorTable.OR; + cmpOp = SqlStdOperatorTable.GREATER_THAN; + } else { + calciteOp = SqlStdOperatorTable.AND; + cmpOp = SqlStdOperatorTable.LESS_THAN_OR_EQUAL; + } + RexNode op = childRexNodeLst.get(1); + RexNode rangeL = childRexNodeLst.get(2); + RexNode rangeH = childRexNodeLst.get(3); + childRexNodeLst.clear(); + childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, rangeL, op)); + childRexNodeLst.add(cluster.getRexBuilder().makeCall(cmpOp, op, rangeH)); } expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); } else { diff --git ql/src/test/queries/clientpositive/udf_between.q ql/src/test/queries/clientpositive/udf_between.q index 9ee33520b9..2544366d34 100644 --- ql/src/test/queries/clientpositive/udf_between.q +++ ql/src/test/queries/clientpositive/udf_between.q @@ -16,3 +16,32 @@ SELECT * FROM src where 'b' between 'a' AND 'c' LIMIT 1; explain SELECT * FROM src where 2 between 2 AND '3' LIMIT 1; SELECT * FROM src where 2 between 2 AND '3' LIMIT 1; + + +create table t(i int); +insert into t values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11); + +SELECT * FROM t where i between 8 and 9 + or i between 9 and 10; + +explain +SELECT * FROM t where i between 8 and 9 + or i between 9 and 10; + +SELECT * FROM t where i between 8 and 9 + or i between 9 and 10; + +explain +SELECT * FROM t where i between 6 and 7 + or i between 9 and 10; + +SELECT * FROM t where i between 6 and 7 + or i between 9 and 10; + +explain +SELECT * FROM t where i not between 6 and 7 + and i not between 9 and 10; + +SELECT * FROM t where i not between 6 and 7 + and i not between 9 and 10; + diff --git ql/src/test/results/clientpositive/correlationoptimizer8.q.out ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 9ad55cd91f..14784bfbce 100644 --- ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -34,22 +34,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -58,7 +58,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -71,38 +71,38 @@ STAGE PLANS: Map Operator Tree: TableScan Union - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan Union - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan alias: x - filterExpr: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -112,14 +112,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 32 Data size: 5856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 32 Data size: 5856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -130,22 +130,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -154,7 +154,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -237,72 +237,72 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan alias: x1 - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan alias: x - filterExpr: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reduce Operator Tree: Demux Operator - Statistics: Num rows: 182 Data size: 18570 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 171 Data size: 18085 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE Mux Operator - Statistics: Num rows: 348 Data size: 34340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 319 Data size: 32145 Basic stats: COMPLETE Column stats: COMPLETE Join Operator condition map: Inner Join 0 to 1 @@ -310,14 +310,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -327,11 +327,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE Mux Operator - Statistics: Num rows: 348 Data size: 34340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 319 Data size: 32145 Basic stats: COMPLETE Column stats: COMPLETE Join Operator condition map: Inner Join 0 to 1 @@ -339,20 +339,20 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator - Statistics: Num rows: 348 Data size: 34340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 319 Data size: 32145 Basic stats: COMPLETE Column stats: COMPLETE Join Operator condition map: Inner Join 0 to 1 @@ -360,14 +360,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 382 Data size: 37774 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 350 Data size: 35359 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -890,22 +890,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -914,7 +914,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -927,38 +927,38 @@ STAGE PLANS: Map Operator Tree: TableScan Union - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan Union - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan alias: x - filterExpr: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -968,14 +968,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 32 Data size: 5856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 32 Data size: 5856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -986,22 +986,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1010,11 +1010,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 13764 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: diff --git ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out index 9c412d97dd..d2b3acc243 100644 --- ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_test_ts.q.out @@ -406,15 +406,18 @@ STAGE PLANS: Processor Tree: TableScan alias: druid_table_alltypesorc + filterExpr: (vc BETWEEN TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2011-01-01 00:00:00.0 US/Pacific' or vc BETWEEN TIMESTAMPLOCALTZ'2012-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2013-01-01 00:00:00.0 US/Pacific') (type: boolean) properties: druid.fieldNames vc druid.fieldTypes timestamp with local time zone - druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["2010-01-01T08:00:00.000Z/2011-01-01T08:00:00.001Z","2012-01-01T08:00:00.000Z/2013-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"default.druid_table_alltypesorc","intervals":["1900-01-01T00:00:00.000Z/2013-01-01T08:00:00.001Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc"],"resultFormat":"compactedList"} druid.query.type scan - Select Operator - expressions: vc (type: timestamp with local time zone) - outputColumnNames: _col0 - ListSink + Filter Operator + predicate: (vc BETWEEN TIMESTAMPLOCALTZ'2010-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2011-01-01 00:00:00.0 US/Pacific' or vc BETWEEN TIMESTAMPLOCALTZ'2012-01-01 00:00:00.0 US/Pacific' AND TIMESTAMPLOCALTZ'2013-01-01 00:00:00.0 US/Pacific') (type: boolean) + Select Operator + expressions: vc (type: timestamp with local time zone) + outputColumnNames: _col0 + ListSink PREHOOK: query: EXPLAIN SELECT `__time` diff --git ql/src/test/results/clientpositive/druid_timeseries.q.out ql/src/test/results/clientpositive/druid_timeseries.q.out index fd69771e61..90f7860ed2 100644 --- ql/src/test/results/clientpositive/druid_timeseries.q.out +++ ql/src/test/results/clientpositive/druid_timeseries.q.out @@ -31,7 +31,7 @@ STAGE PLANS: properties: druid.fieldNames $f0 druid.fieldTypes bigint - druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"and","fields":[{"type":"bound","dimension":"__time","lower":"2010-01-01T00:00:00.000Z","lowerStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"bound","dimension":"__time","upper":"2012-03-01T00:00:00.000Z","upperStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},{"type":"bound","dimension":"added","upper":"0.0","upperStrict":false,"ordering":"numeric"}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} + druid.query.json {"queryType":"timeseries","dataSource":"wikipedia","descending":false,"granularity":"all","filter":{"type":"or","fields":[{"type":"bound","dimension":"__time","lower":"2010-01-01T00:00:00.000Z","lowerStrict":false,"upper":"2012-03-01T00:00:00.000Z","upperStrict":false,"ordering":"lexicographic","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"bound","dimension":"added","upper":"0.0","upperStrict":false,"ordering":"numeric"}]},"aggregations":[{"type":"count","name":"$f0"}],"intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"context":{"skipEmptyBuckets":false}} druid.query.type timeseries Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/filter_numeric.q.out ql/src/test/results/clientpositive/filter_numeric.q.out index 74ce762191..0bd100f945 100644 --- ql/src/test/results/clientpositive/filter_numeric.q.out +++ ql/src/test/results/clientpositive/filter_numeric.q.out @@ -1664,7 +1664,7 @@ STAGE PLANS: Processor Tree: TableScan alias: partint - filterExpr: (not hr BETWEEN 12 AND 14) (type: boolean) + filterExpr: hr NOT BETWEEN 12 AND 14 (type: boolean) Statistics: Num rows: 500 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), hr (type: int) diff --git ql/src/test/results/clientpositive/infer_join_preds.q.out ql/src/test/results/clientpositive/infer_join_preds.q.out index d73005c82d..1410f7a632 100644 --- ql/src/test/results/clientpositive/infer_join_preds.q.out +++ ql/src/test/results/clientpositive/infer_join_preds.q.out @@ -667,10 +667,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: (4.0D BETWEEN UDFToDouble(key) AND UDFToDouble(value) and key is not null and value is not null) (type: boolean) + filterExpr: 4.0D BETWEEN UDFToDouble(key) AND UDFToDouble(value) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (4.0D BETWEEN UDFToDouble(key) AND UDFToDouble(value) and key is not null and value is not null) (type: boolean) + predicate: 4.0D BETWEEN UDFToDouble(key) AND UDFToDouble(value) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) diff --git ql/src/test/results/clientpositive/join34.q.out ql/src/test/results/clientpositive/join34.q.out index f5a280f186..19f21c8ae6 100644 --- ql/src/test/results/clientpositive/join34.q.out +++ ql/src/test/results/clientpositive/join34.q.out @@ -32,17 +32,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n1 -OPTIMIZED SQL: SELECT `t5`.`key`, `t5`.`value`, `t3`.`value` AS `value1` -FROM (SELECT `key`, `value` -FROM `default`.`src` -WHERE `key` < 20 -UNION ALL -SELECT `key`, `value` -FROM `default`.`src` -WHERE `key` > 100) AS `t3` -INNER JOIN (SELECT `key`, `value` -FROM `default`.`src1` -WHERE (`key` < 20 OR `key` > 100) AND `key` IS NOT NULL) AS `t5` ON `t3`.`key` = `t5`.`key` STAGE DEPENDENCIES: Stage-7 is a root stage Stage-6 depends on stages: Stage-7 @@ -61,17 +50,17 @@ STAGE PLANS: $hdt$_1:x TableScan alias: x - filterExpr: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -83,19 +72,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 332 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 296 Data size: 52688 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -104,17 +93,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 - Statistics: Num rows: 32 Data size: 8512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 8512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 32 Data size: 8512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -145,7 +134,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value, val2 - Statistics: Num rows: 32 Data size: 8512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') mode: hash @@ -171,19 +160,19 @@ STAGE PLANS: MultiFileSpray: false TableScan alias: x1 - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 26344 Basic stats: COMPLETE Column stats: COMPLETE Union - Statistics: Num rows: 332 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 296 Data size: 52688 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -192,17 +181,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 - Statistics: Num rows: 32 Data size: 8512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 8512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 32 Data size: 8512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -233,7 +222,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value, val2 - Statistics: Num rows: 32 Data size: 8512 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 12236 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') mode: hash diff --git ql/src/test/results/clientpositive/join35.q.out ql/src/test/results/clientpositive/join35.q.out index 227782126c..1597278057 100644 --- ql/src/test/results/clientpositive/join35.q.out +++ ql/src/test/results/clientpositive/join35.q.out @@ -32,19 +32,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n24 -OPTIMIZED SQL: SELECT `t5`.`key`, `t5`.`value`, `t3`.`$f1` AS `cnt` -FROM (SELECT `key`, COUNT(*) AS `$f1` -FROM `default`.`src` -WHERE `key` < 20 -GROUP BY `key` -UNION ALL -SELECT `key`, COUNT(*) AS `$f1` -FROM `default`.`src` -WHERE `key` > 100 -GROUP BY `key`) AS `t3` -INNER JOIN (SELECT `key`, `value` -FROM `default`.`src1` -WHERE (`key` < 20 OR `key` > 100) AND `key` IS NOT NULL) AS `t5` ON `t3`.`key` = `t5`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-8 depends on stages: Stage-1, Stage-5 @@ -60,25 +47,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -143,7 +130,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -173,17 +160,17 @@ STAGE PLANS: $hdt$_1:x TableScan alias: x - filterExpr: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 23 Data size: 4025 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col0 (type: string) @@ -196,7 +183,7 @@ STAGE PLANS: TableScan GatherStats: false Union - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -205,17 +192,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 - Statistics: Num rows: 32 Data size: 5856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 32 Data size: 5728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -246,7 +233,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: key, value, val2 - Statistics: Num rows: 32 Data size: 5728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') mode: hash @@ -273,7 +260,7 @@ STAGE PLANS: TableScan GatherStats: false Union - Statistics: Num rows: 166 Data size: 15770 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 148 Data size: 14060 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -282,17 +269,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 - Statistics: Num rows: 32 Data size: 5856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8418 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col3 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 32 Data size: 5728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 32 Data size: 5728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -323,7 +310,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: key, value, val2 - Statistics: Num rows: 32 Data size: 5728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46 Data size: 8234 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') mode: hash @@ -561,25 +548,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 12876 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -644,7 +631,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 74 Data size: 7030 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 diff --git ql/src/test/results/clientpositive/join45.q.out ql/src/test/results/clientpositive/join45.q.out index 8a6fd8fcec..7002c8e024 100644 --- ql/src/test/results/clientpositive/join45.q.out +++ ql/src/test/results/clientpositive/join45.q.out @@ -506,7 +506,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 12500 Data size: 4612500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((_col2 + _col5) <= 102.0D) and ((_col2 + _col5) >= 100.0D)) (type: boolean) + predicate: (_col2 + _col5) BETWEEN 100.0D AND 102.0D (type: boolean) Statistics: Num rows: 1388 Data size: 512172 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) diff --git ql/src/test/results/clientpositive/join47.q.out ql/src/test/results/clientpositive/join47.q.out index 475079dacf..aa6194f77d 100644 --- ql/src/test/results/clientpositive/join47.q.out +++ ql/src/test/results/clientpositive/join47.q.out @@ -502,7 +502,7 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col2 + _col5) >= 100.0D)} {((_col2 + _col5) <= 102.0D)} + residual filter predicates: {(_col2 + _col5) BETWEEN 100.0D AND 102.0D} Statistics: Num rows: 1388 Data size: 512172 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) diff --git ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out index d98a9b665d..421e9fd748 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out @@ -1300,10 +1300,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv_n0 - filterExpr: ((d_year >= 1992) and (c_region = 'ASIA') and (s_region = 'ASIA') and (d_year <= 1997)) (type: boolean) + filterExpr: ((c_region = 'ASIA') and (s_region = 'ASIA') and d_year BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((c_region = 'ASIA') and (d_year <= 1997) and (d_year >= 1992) and (s_region = 'ASIA')) (type: boolean) + predicate: ((c_region = 'ASIA') and (s_region = 'ASIA') and d_year BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c_nation (type: string), d_year (type: int), s_nation (type: string), lo_revenue (type: double) @@ -1425,10 +1425,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv_n0 - filterExpr: ((d_year >= 1992) and (c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES') and (d_year <= 1997)) (type: boolean) + filterExpr: ((c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES') and d_year BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((c_nation = 'UNITED STATES') and (d_year <= 1997) and (d_year >= 1992) and (s_nation = 'UNITED STATES')) (type: boolean) + predicate: ((c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES') and d_year BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 348 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c_city (type: string), d_year (type: int), s_city (type: string), lo_revenue (type: double) @@ -1550,10 +1550,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv_n0 - filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (d_year >= 1992) and (d_year <= 1997)) (type: boolean) + filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and d_year BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (d_year <= 1997) and (d_year >= 1992) and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean) + predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and d_year BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(lo_revenue) diff --git ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out index abefa7eaa2..643c79fa76 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out @@ -1302,10 +1302,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv - filterExpr: ((UDFToInteger(d_year) >= 1992) and (c_region = 'ASIA') and (s_region = 'ASIA') and (UDFToInteger(d_year) <= 1997)) (type: boolean) + filterExpr: ((c_region = 'ASIA') and (s_region = 'ASIA') and UDFToInteger(d_year) BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((UDFToInteger(d_year) <= 1997) and (UDFToInteger(d_year) >= 1992) and (c_region = 'ASIA') and (s_region = 'ASIA')) (type: boolean) + predicate: ((c_region = 'ASIA') and (s_region = 'ASIA') and UDFToInteger(d_year) BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c_nation (type: string), s_nation (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) @@ -1427,10 +1427,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv - filterExpr: ((UDFToInteger(d_year) >= 1992) and (c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES') and (UDFToInteger(d_year) <= 1997)) (type: boolean) + filterExpr: ((c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES') and UDFToInteger(d_year) BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((UDFToInteger(d_year) <= 1997) and (UDFToInteger(d_year) >= 1992) and (c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES')) (type: boolean) + predicate: ((c_nation = 'UNITED STATES') and (s_nation = 'UNITED STATES') and UDFToInteger(d_year) BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) @@ -1552,10 +1552,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv - filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (UDFToInteger(d_year) >= 1992) and (UDFToInteger(d_year) <= 1997)) (type: boolean) + filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and UDFToInteger(d_year) BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((UDFToInteger(d_year) <= 1997) and (UDFToInteger(d_year) >= 1992) and (c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean) + predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and UDFToInteger(d_year) BETWEEN 1992 AND 1997) (type: boolean) Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) diff --git ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 2a684018df..c9985ceb31 100644 --- ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -812,8 +812,7 @@ POSTHOOK: Input: default@part_null_n0 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size between (select min(p_size) from part) and (select avg(p_size) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -831,10 +830,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 4 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -843,16 +841,16 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: part @@ -886,42 +884,27 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 26 Data size: 16406 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: UDFToDouble(_col5) BETWEEN UDFToDouble(_col9) AND _col10 (type: boolean) - Statistics: Num rows: 2 Data size: 1262 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col10 <= _col9)} {(_col9 <= _col11)} + Statistics: Num rows: 2 Data size: 1286 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -929,11 +912,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Reducer 6 + Select Operator + expressions: UDFToDouble(_col0) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -956,8 +943,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_size between (select min(p_size) from part) and (select avg(p_size) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -1755,7 +1741,7 @@ POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size BETWEEN (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND (select max(p_size) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -1776,7 +1762,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -1785,17 +1771,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part + filterExpr: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs Map 4 @@ -1846,41 +1836,40 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col4 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + residual filter predicates: {(_col9 <= _col5)} + Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: boolean) + Statistics: Num rows: 9 Data size: 5690 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 28 Data size: 17955 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: CASE WHEN (_col10 is null) THEN (_col5 BETWEEN null AND _col12) ELSE (_col5 BETWEEN _col9 AND _col12) END (type: boolean) - Statistics: Num rows: 14 Data size: 8977 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8977 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 8977 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11 + residual filter predicates: {(_col5 <= _col11)} + Statistics: Num rows: 3 Data size: 1923 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 1923 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 1923 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1891,15 +1880,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col2 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: boolean) + value expressions: _col0 (type: int) Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1919,7 +1908,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_size BETWEEN (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND (select max(p_size) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index a91a36b358..40f8e35f4e 100644 --- ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -192,7 +192,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 36 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col4 AND _col4) THEN ('Ok') ELSE ('NoOk') END (type: string) + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col4 BETWEEN _col1 AND _col1) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 36 Data size: 7192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -370,7 +370,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 36 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col1 BETWEEN _col4 AND _col4 (type: boolean) + predicate: _col4 BETWEEN _col1 AND _col1 (type: boolean) Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint) diff --git ql/src/test/results/clientpositive/llap/vector_interval_2.q.out ql/src/test/results/clientpositive/llap/vector_interval_2.q.out index 3b2b7ba971..b613e72db6 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_2.q.out @@ -1344,7 +1344,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_interval_2 - filterExpr: ((DATE'2002-03-01' = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (DATE'2002-03-01' <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (DATE'2002-03-01' >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = DATE'2002-03-01') and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= DATE'2002-03-01') and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= DATE'2002-03-01') and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (DATE'2002-03-01' = (dt + INTERVAL'1-2')) and (DATE'2002-03-01' <= (dt + INTERVAL'1-2')) and (DATE'2002-03-01' >= (dt + INTERVAL'1-2')) and ((dt + INTERVAL'1-2') = DATE'2002-03-01') and ((dt + INTERVAL'1-2') <= DATE'2002-03-01') and ((dt + INTERVAL'1-2') >= DATE'2002-03-01') and (dt <> (dt + INTERVAL'1-2'))) (type: boolean) + filterExpr: ((DATE'2002-03-01' = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and DATE'2002-03-01' BETWEEN (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) AND (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = DATE'2002-03-01') and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (DATE'2002-03-01' = (dt + INTERVAL'1-2')) and (dt + INTERVAL'1-2') BETWEEN DATE'2002-03-01' AND DATE'2002-03-01' and ((dt + INTERVAL'1-2') = DATE'2002-03-01') and (dt <> (dt + INTERVAL'1-2'))) (type: boolean) Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -1352,8 +1352,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11747, col 8:date)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 7:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 7:interval_year_month) -> 8:date), FilterDateScalarLessEqualDateColumn(val 11747, col 10:date)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 9:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 9:interval_year_month) -> 10:date), FilterDateScalarGreaterEqualDateColumn(val 11747, col 12:date)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 11:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 11:interval_year_month) -> 12:date), FilterDateColEqualDateScalar(col 14:date, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 13:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 13:interval_year_month) -> 14:date), FilterDateColLessEqualDateScalar(col 16:date, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 15:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 15:interval_year_month) -> 16:date), FilterDateColGreaterEqualDateScalar(col 18:date, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 17:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 17:interval_year_month) -> 18:date), FilterLongColNotEqualLongColumn(col 1:date, col 20:date)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 19:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 19:interval_year_month) -> 20:date), FilterDateScalarEqualDateColumn(val 11747, col 21:date)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 21:date), FilterDateScalarLessEqualDateColumn(val 11747, col 22:date)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 22:date), FilterDateScalarGreaterEqualDateColumn(val 11747, col 23:date)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 23:date), FilterDateColEqualDateScalar(col 24:date, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 24:date), FilterDateColLessEqualDateScalar(col 25:date, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 25:date), FilterDateColGreaterEqualDateScalar(col 26:date, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 26:date), FilterLongColNotEqualLongColumn(col 1:date, col 27:date)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 27:date)) - predicate: (((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) <= DATE'2002-03-01') and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = DATE'2002-03-01') and ((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) >= DATE'2002-03-01') and ((dt + INTERVAL'1-2') <= DATE'2002-03-01') and ((dt + INTERVAL'1-2') = DATE'2002-03-01') and ((dt + INTERVAL'1-2') >= DATE'2002-03-01') and (DATE'2002-03-01' <= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (DATE'2002-03-01' <= (dt + INTERVAL'1-2')) and (DATE'2002-03-01' = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (DATE'2002-03-01' = (dt + INTERVAL'1-2')) and (DATE'2002-03-01' >= (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (DATE'2002-03-01' >= (dt + INTERVAL'1-2')) and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (dt <> (dt + INTERVAL'1-2'))) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterDateScalarEqualDateColumn(val 11747, col 8:date)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 7:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 7:interval_year_month) -> 8:date), SelectColumnIsTrue(col 13:boolean)(children: VectorUDFAdaptor(DATE'2002-03-01' BETWEEN (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) AND (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)))(children: DateColAddIntervalYearMonthColumn(col 1:date, col 9:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 9:interval_year_month) -> 10:date, DateColAddIntervalYearMonthColumn(col 1:date, col 11:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 11:interval_year_month) -> 12:date) -> 13:boolean), FilterDateColEqualDateScalar(col 15:date, val 11747)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 14:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 14:interval_year_month) -> 15:date), FilterLongColNotEqualLongColumn(col 1:date, col 17:date)(children: DateColAddIntervalYearMonthColumn(col 1:date, col 16:interval_year_month)(children: CastStringToIntervalYearMonth(col 2:string) -> 16:interval_year_month) -> 17:date), FilterDateScalarEqualDateColumn(val 11747, col 18:date)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 18:date), FilterLongColumnBetween(col 19:date, left 11747, right 11747)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 19:date), FilterDateColEqualDateScalar(col 20:date, val 11747)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 20:date), FilterLongColNotEqualLongColumn(col 1:date, col 21:date)(children: DateColAddIntervalYearMonthScalar(col 1:date, val 1-2) -> 21:date)) + predicate: (((dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) = DATE'2002-03-01') and ((dt + INTERVAL'1-2') = DATE'2002-03-01') and (DATE'2002-03-01' = (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (DATE'2002-03-01' = (dt + INTERVAL'1-2')) and (dt + INTERVAL'1-2') BETWEEN DATE'2002-03-01' AND DATE'2002-03-01' and (dt <> (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) and (dt <> (dt + INTERVAL'1-2')) and DATE'2002-03-01' BETWEEN (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) AND (dt + CAST( str1 AS INTERVAL YEAR TO MONTH))) (type: boolean) Statistics: Num rows: 1 Data size: 183 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) @@ -1380,7 +1380,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -1533,7 +1533,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_interval_2 - filterExpr: ((TIMESTAMP'2002-03-01 01:02:03' = (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-03-01 01:02:03' <= (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-03-01 01:02:03' >= (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-04-01 01:02:03' <> (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-02-01 01:02:03' < (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-04-01 01:02:03' > (ts + INTERVAL'1-2')) and ((ts + INTERVAL'1-2') = TIMESTAMP'2002-03-01 01:02:03') and ((ts + INTERVAL'1-2') >= TIMESTAMP'2002-03-01 01:02:03') and ((ts + INTERVAL'1-2') <= TIMESTAMP'2002-03-01 01:02:03') and ((ts + INTERVAL'1-2') <> TIMESTAMP'2002-04-01 01:02:03') and ((ts + INTERVAL'1-2') > TIMESTAMP'2002-02-01 01:02:03') and ((ts + INTERVAL'1-2') < TIMESTAMP'2002-04-01 01:02:03') and (ts = (ts + INTERVAL'0-0')) and (ts <> (ts + INTERVAL'1-0')) and (ts <= (ts + INTERVAL'1-0')) and (ts < (ts + INTERVAL'1-0')) and (ts >= (ts - INTERVAL'1-0')) and (ts > (ts - INTERVAL'1-0'))) (type: boolean) + filterExpr: ((TIMESTAMP'2002-03-01 01:02:03' = (ts + INTERVAL'1-2')) and TIMESTAMP'2002-03-01 01:02:03' BETWEEN (ts + INTERVAL'1-2') AND (ts + INTERVAL'1-2') and (TIMESTAMP'2002-04-01 01:02:03' <> (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-02-01 01:02:03' < (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-04-01 01:02:03' > (ts + INTERVAL'1-2')) and ((ts + INTERVAL'1-2') = TIMESTAMP'2002-03-01 01:02:03') and ((ts + INTERVAL'1-2') <> TIMESTAMP'2002-04-01 01:02:03') and ((ts + INTERVAL'1-2') > TIMESTAMP'2002-02-01 01:02:03') and ((ts + INTERVAL'1-2') < TIMESTAMP'2002-04-01 01:02:03') and (ts = (ts + INTERVAL'0-0')) and (ts <> (ts + INTERVAL'1-0')) and ts BETWEEN (ts - INTERVAL'1-0') AND (ts + INTERVAL'1-0') and (ts < (ts + INTERVAL'1-0')) and (ts > (ts - INTERVAL'1-0'))) (type: boolean) Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -1541,8 +1541,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2002-03-01 01:02:03, col 7:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 7:timestamp), FilterTimestampScalarLessEqualTimestampColumn(val 2002-03-01 01:02:03, col 8:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 8:timestamp), FilterTimestampScalarGreaterEqualTimestampColumn(val 2002-03-01 01:02:03, col 9:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 9:timestamp), FilterTimestampScalarNotEqualTimestampColumn(val 2002-04-01 01:02:03, col 10:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 10:timestamp), FilterTimestampScalarLessTimestampColumn(val 2002-02-01 01:02:03, col 11:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 11:timestamp), FilterTimestampScalarGreaterTimestampColumn(val 2002-04-01 01:02:03, col 12:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 12:timestamp), FilterTimestampColEqualTimestampScalar(col 13:timestamp, val 2002-03-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 13:timestamp), FilterTimestampColGreaterEqualTimestampScalar(col 14:timestamp, val 2002-03-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 14:timestamp), FilterTimestampColLessEqualTimestampScalar(col 15:timestamp, val 2002-03-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 15:timestamp), FilterTimestampColNotEqualTimestampScalar(col 16:timestamp, val 2002-04-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 16:timestamp), FilterTimestampColGreaterTimestampScalar(col 17:timestamp, val 2002-02-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 17:timestamp), FilterTimestampColLessTimestampScalar(col 18:timestamp, val 2002-04-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 18:timestamp), FilterTimestampColEqualTimestampColumn(col 0:timestamp, col 19:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 0-0) -> 19:timestamp), FilterTimestampColNotEqualTimestampColumn(col 0:timestamp, col 20:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 20:timestamp), FilterTimestampColLessEqualTimestampColumn(col 0:timestamp, col 21:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 21:timestamp), FilterTimestampColLessTimestampColumn(col 0:timestamp, col 22:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 22:timestamp), FilterTimestampColGreaterEqualTimestampColumn(col 0:timestamp, col 23:timestamp)(children: TimestampColSubtractIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 23:timestamp), FilterTimestampColGreaterTimestampColumn(col 0:timestamp, col 24:timestamp)(children: TimestampColSubtractIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 24:timestamp)) - predicate: (((ts + INTERVAL'1-2') < TIMESTAMP'2002-04-01 01:02:03') and ((ts + INTERVAL'1-2') <= TIMESTAMP'2002-03-01 01:02:03') and ((ts + INTERVAL'1-2') <> TIMESTAMP'2002-04-01 01:02:03') and ((ts + INTERVAL'1-2') = TIMESTAMP'2002-03-01 01:02:03') and ((ts + INTERVAL'1-2') > TIMESTAMP'2002-02-01 01:02:03') and ((ts + INTERVAL'1-2') >= TIMESTAMP'2002-03-01 01:02:03') and (TIMESTAMP'2002-02-01 01:02:03' < (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-03-01 01:02:03' <= (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-03-01 01:02:03' = (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-03-01 01:02:03' >= (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-04-01 01:02:03' <> (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-04-01 01:02:03' > (ts + INTERVAL'1-2')) and (ts < (ts + INTERVAL'1-0')) and (ts <= (ts + INTERVAL'1-0')) and (ts <> (ts + INTERVAL'1-0')) and (ts = (ts + INTERVAL'0-0')) and (ts > (ts - INTERVAL'1-0')) and (ts >= (ts - INTERVAL'1-0'))) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2002-03-01 01:02:03, col 7:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 7:timestamp), SelectColumnIsTrue(col 10:boolean)(children: VectorUDFAdaptor(TIMESTAMP'2002-03-01 01:02:03' BETWEEN (ts + INTERVAL'1-2') AND (ts + INTERVAL'1-2'))(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 8:timestamp, TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 9:timestamp) -> 10:boolean), FilterTimestampScalarNotEqualTimestampColumn(val 2002-04-01 01:02:03, col 11:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 11:timestamp), FilterTimestampScalarLessTimestampColumn(val 2002-02-01 01:02:03, col 12:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 12:timestamp), FilterTimestampScalarGreaterTimestampColumn(val 2002-04-01 01:02:03, col 13:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 13:timestamp), FilterTimestampColEqualTimestampScalar(col 14:timestamp, val 2002-03-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 14:timestamp), FilterTimestampColNotEqualTimestampScalar(col 15:timestamp, val 2002-04-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 15:timestamp), FilterTimestampColGreaterTimestampScalar(col 16:timestamp, val 2002-02-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 16:timestamp), FilterTimestampColLessTimestampScalar(col 17:timestamp, val 2002-04-01 01:02:03)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-2) -> 17:timestamp), FilterTimestampColEqualTimestampColumn(col 0:timestamp, col 18:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 0-0) -> 18:timestamp), FilterTimestampColNotEqualTimestampColumn(col 0:timestamp, col 19:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 19:timestamp), SelectColumnIsTrue(col 22:boolean)(children: VectorUDFAdaptor(ts BETWEEN (ts - INTERVAL'1-0') AND (ts + INTERVAL'1-0'))(children: TimestampColSubtractIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 20:timestamp, TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 21:timestamp) -> 22:boolean), FilterTimestampColLessTimestampColumn(col 0:timestamp, col 23:timestamp)(children: TimestampColAddIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 23:timestamp), FilterTimestampColGreaterTimestampColumn(col 0:timestamp, col 24:timestamp)(children: TimestampColSubtractIntervalYearMonthScalar(col 0:timestamp, val 1-0) -> 24:timestamp)) + predicate: (((ts + INTERVAL'1-2') < TIMESTAMP'2002-04-01 01:02:03') and ((ts + INTERVAL'1-2') <> TIMESTAMP'2002-04-01 01:02:03') and ((ts + INTERVAL'1-2') = TIMESTAMP'2002-03-01 01:02:03') and ((ts + INTERVAL'1-2') > TIMESTAMP'2002-02-01 01:02:03') and (TIMESTAMP'2002-02-01 01:02:03' < (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-03-01 01:02:03' = (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-04-01 01:02:03' <> (ts + INTERVAL'1-2')) and (TIMESTAMP'2002-04-01 01:02:03' > (ts + INTERVAL'1-2')) and (ts < (ts + INTERVAL'1-0')) and (ts <> (ts + INTERVAL'1-0')) and (ts = (ts + INTERVAL'0-0')) and (ts > (ts - INTERVAL'1-0')) and TIMESTAMP'2002-03-01 01:02:03' BETWEEN (ts + INTERVAL'1-2') AND (ts + INTERVAL'1-2') and ts BETWEEN (ts - INTERVAL'1-0') AND (ts + INTERVAL'1-0')) (type: boolean) Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) @@ -1569,7 +1569,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -1732,7 +1732,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_interval_2 - filterExpr: ((TIMESTAMP'2001-01-01 01:02:03' = (dt + INTERVAL'0 01:02:03.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <> (dt + INTERVAL'0 01:02:04.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <= (dt + INTERVAL'0 01:02:03.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' < (dt + INTERVAL'0 01:02:04.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' >= (dt - INTERVAL'0 01:02:03.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' > (dt - INTERVAL'0 01:02:04.000000000')) and ((dt + INTERVAL'0 01:02:03.000000000') = TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:04.000000000') <> TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:03.000000000') >= TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:04.000000000') > TIMESTAMP'2001-01-01 01:02:03') and ((dt - INTERVAL'0 01:02:03.000000000') <= TIMESTAMP'2001-01-01 01:02:03') and ((dt - INTERVAL'0 01:02:04.000000000') < TIMESTAMP'2001-01-01 01:02:03') and (ts = (dt + INTERVAL'0 01:02:03.000000000')) and (ts <> (dt + INTERVAL'0 01:02:04.000000000')) and (ts <= (dt + INTERVAL'0 01:02:03.000000000')) and (ts < (dt + INTERVAL'0 01:02:04.000000000')) and (ts >= (dt - INTERVAL'0 01:02:03.000000000')) and (ts > (dt - INTERVAL'0 01:02:04.000000000'))) (type: boolean) + filterExpr: ((TIMESTAMP'2001-01-01 01:02:03' = (dt + INTERVAL'0 01:02:03.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <> (dt + INTERVAL'0 01:02:04.000000000')) and TIMESTAMP'2001-01-01 01:02:03' BETWEEN (dt - INTERVAL'0 01:02:03.000000000') AND (dt + INTERVAL'0 01:02:03.000000000') and (TIMESTAMP'2001-01-01 01:02:03' < (dt + INTERVAL'0 01:02:04.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' > (dt - INTERVAL'0 01:02:04.000000000')) and ((dt + INTERVAL'0 01:02:03.000000000') = TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:04.000000000') <> TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:04.000000000') > TIMESTAMP'2001-01-01 01:02:03') and ((dt - INTERVAL'0 01:02:04.000000000') < TIMESTAMP'2001-01-01 01:02:03') and (ts = (dt + INTERVAL'0 01:02:03.000000000')) and (ts <> (dt + INTERVAL'0 01:02:04.000000000')) and ts BETWEEN (dt - INTERVAL'0 01:02:03.000000000') AND (dt + INTERVAL'0 01:02:03.000000000') and (ts < (dt + INTERVAL'0 01:02:04.000000000')) and (ts > (dt - INTERVAL'0 01:02:04.000000000'))) (type: boolean) Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -1740,8 +1740,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03, col 7:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 7:timestamp), FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03, col 8:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 8:timestamp), FilterTimestampScalarLessEqualTimestampColumn(val 2001-01-01 01:02:03, col 9:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 9:timestamp), FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03, col 10:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 10:timestamp), FilterTimestampScalarGreaterEqualTimestampColumn(val 2001-01-01 01:02:03, col 11:timestamp)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 11:timestamp), FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03, col 12:timestamp)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 12:timestamp), FilterTimestampColEqualTimestampScalar(col 13:timestamp, val 2001-01-01 01:02:03)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 13:timestamp), FilterTimestampColNotEqualTimestampScalar(col 14:timestamp, val 2001-01-01 01:02:03)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 14:timestamp), FilterTimestampColGreaterEqualTimestampScalar(col 15:timestamp, val 2001-01-01 01:02:03)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 15:timestamp), FilterTimestampColGreaterTimestampScalar(col 16:timestamp, val 2001-01-01 01:02:03)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 16:timestamp), FilterTimestampColLessEqualTimestampScalar(col 17:timestamp, val 2001-01-01 01:02:03)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 17:timestamp), FilterTimestampColLessTimestampScalar(col 18:timestamp, val 2001-01-01 01:02:03)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 18:timestamp), FilterTimestampColEqualTimestampColumn(col 0:timestamp, col 19:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 19:timestamp), FilterTimestampColNotEqualTimestampColumn(col 0:timestamp, col 20:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 20:timestamp), FilterTimestampColLessEqualTimestampColumn(col 0:timestamp, col 21:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 21:timestamp), FilterTimestampColLessTimestampColumn(col 0:timestamp, col 22:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 22:timestamp), FilterTimestampColGreaterEqualTimestampColumn(col 0:timestamp, col 23:timestamp)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 23:timestamp), FilterTimestampColGreaterTimestampColumn(col 0:timestamp, col 24:timestamp)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 24:timestamp)) - predicate: (((dt + INTERVAL'0 01:02:03.000000000') = TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:03.000000000') >= TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:04.000000000') <> TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:04.000000000') > TIMESTAMP'2001-01-01 01:02:03') and ((dt - INTERVAL'0 01:02:03.000000000') <= TIMESTAMP'2001-01-01 01:02:03') and ((dt - INTERVAL'0 01:02:04.000000000') < TIMESTAMP'2001-01-01 01:02:03') and (TIMESTAMP'2001-01-01 01:02:03' < (dt + INTERVAL'0 01:02:04.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <= (dt + INTERVAL'0 01:02:03.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <> (dt + INTERVAL'0 01:02:04.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' = (dt + INTERVAL'0 01:02:03.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' > (dt - INTERVAL'0 01:02:04.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' >= (dt - INTERVAL'0 01:02:03.000000000')) and (ts < (dt + INTERVAL'0 01:02:04.000000000')) and (ts <= (dt + INTERVAL'0 01:02:03.000000000')) and (ts <> (dt + INTERVAL'0 01:02:04.000000000')) and (ts = (dt + INTERVAL'0 01:02:03.000000000')) and (ts > (dt - INTERVAL'0 01:02:04.000000000')) and (ts >= (dt - INTERVAL'0 01:02:03.000000000'))) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03, col 7:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 7:timestamp), FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03, col 8:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 8:timestamp), SelectColumnIsTrue(col 11:boolean)(children: VectorUDFAdaptor(TIMESTAMP'2001-01-01 01:02:03' BETWEEN (dt - INTERVAL'0 01:02:03.000000000') AND (dt + INTERVAL'0 01:02:03.000000000'))(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 9:timestamp, DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 10:timestamp) -> 11:boolean), FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03, col 12:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 12:timestamp), FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03, col 13:timestamp)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 13:timestamp), FilterTimestampColEqualTimestampScalar(col 14:timestamp, val 2001-01-01 01:02:03)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 14:timestamp), FilterTimestampColNotEqualTimestampScalar(col 15:timestamp, val 2001-01-01 01:02:03)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 15:timestamp), FilterTimestampColGreaterTimestampScalar(col 16:timestamp, val 2001-01-01 01:02:03)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 16:timestamp), FilterTimestampColLessTimestampScalar(col 17:timestamp, val 2001-01-01 01:02:03)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 17:timestamp), FilterTimestampColEqualTimestampColumn(col 0:timestamp, col 18:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 18:timestamp), FilterTimestampColNotEqualTimestampColumn(col 0:timestamp, col 19:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 19:timestamp), SelectColumnIsTrue(col 22:boolean)(children: VectorUDFAdaptor(ts BETWEEN (dt - INTERVAL'0 01:02:03.000000000') AND (dt + INTERVAL'0 01:02:03.000000000'))(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 20:timestamp, DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:03.000000000) -> 21:timestamp) -> 22:boolean), FilterTimestampColLessTimestampColumn(col 0:timestamp, col 23:timestamp)(children: DateColAddIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 23:timestamp), FilterTimestampColGreaterTimestampColumn(col 0:timestamp, col 24:timestamp)(children: DateColSubtractIntervalDayTimeScalar(col 1:date, val 0 01:02:04.000000000) -> 24:timestamp)) + predicate: (((dt + INTERVAL'0 01:02:03.000000000') = TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:04.000000000') <> TIMESTAMP'2001-01-01 01:02:03') and ((dt + INTERVAL'0 01:02:04.000000000') > TIMESTAMP'2001-01-01 01:02:03') and ((dt - INTERVAL'0 01:02:04.000000000') < TIMESTAMP'2001-01-01 01:02:03') and (TIMESTAMP'2001-01-01 01:02:03' < (dt + INTERVAL'0 01:02:04.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <> (dt + INTERVAL'0 01:02:04.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' = (dt + INTERVAL'0 01:02:03.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' > (dt - INTERVAL'0 01:02:04.000000000')) and (ts < (dt + INTERVAL'0 01:02:04.000000000')) and (ts <> (dt + INTERVAL'0 01:02:04.000000000')) and (ts = (dt + INTERVAL'0 01:02:03.000000000')) and (ts > (dt - INTERVAL'0 01:02:04.000000000')) and TIMESTAMP'2001-01-01 01:02:03' BETWEEN (dt - INTERVAL'0 01:02:03.000000000') AND (dt + INTERVAL'0 01:02:03.000000000') and ts BETWEEN (dt - INTERVAL'0 01:02:03.000000000') AND (dt + INTERVAL'0 01:02:03.000000000')) (type: boolean) Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) @@ -1768,7 +1768,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -1931,7 +1931,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vector_interval_2 - filterExpr: ((TIMESTAMP'2001-01-01 01:02:03' = (ts + INTERVAL'0 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <> (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <= (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' < (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' >= (ts - INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' > (ts - INTERVAL'1 00:00:00.000000000')) and ((ts + INTERVAL'0 00:00:00.000000000') = TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') <> TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') >= TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') > TIMESTAMP'2001-01-01 01:02:03') and ((ts - INTERVAL'1 00:00:00.000000000') <= TIMESTAMP'2001-01-01 01:02:03') and ((ts - INTERVAL'1 00:00:00.000000000') < TIMESTAMP'2001-01-01 01:02:03') and (ts = (ts + INTERVAL'0 00:00:00.000000000')) and (ts <> (ts + INTERVAL'1 00:00:00.000000000')) and (ts <= (ts + INTERVAL'1 00:00:00.000000000')) and (ts < (ts + INTERVAL'1 00:00:00.000000000')) and (ts >= (ts - INTERVAL'1 00:00:00.000000000')) and (ts > (ts - INTERVAL'1 00:00:00.000000000'))) (type: boolean) + filterExpr: ((TIMESTAMP'2001-01-01 01:02:03' = (ts + INTERVAL'0 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <> (ts + INTERVAL'1 00:00:00.000000000')) and TIMESTAMP'2001-01-01 01:02:03' BETWEEN (ts - INTERVAL'1 00:00:00.000000000') AND (ts + INTERVAL'1 00:00:00.000000000') and (TIMESTAMP'2001-01-01 01:02:03' < (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' > (ts - INTERVAL'1 00:00:00.000000000')) and ((ts + INTERVAL'0 00:00:00.000000000') = TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') <> TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') > TIMESTAMP'2001-01-01 01:02:03') and ((ts - INTERVAL'1 00:00:00.000000000') < TIMESTAMP'2001-01-01 01:02:03') and (ts = (ts + INTERVAL'0 00:00:00.000000000')) and (ts <> (ts + INTERVAL'1 00:00:00.000000000')) and ts BETWEEN (ts - INTERVAL'1 00:00:00.000000000') AND (ts + INTERVAL'1 00:00:00.000000000') and (ts < (ts + INTERVAL'1 00:00:00.000000000')) and (ts > (ts - INTERVAL'1 00:00:00.000000000'))) (type: boolean) Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -1939,8 +1939,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03, col 7:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 0 00:00:00.000000000) -> 7:timestamp), FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03, col 8:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 8:timestamp), FilterTimestampScalarLessEqualTimestampColumn(val 2001-01-01 01:02:03, col 9:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 9:timestamp), FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03, col 10:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 10:timestamp), FilterTimestampScalarGreaterEqualTimestampColumn(val 2001-01-01 01:02:03, col 11:timestamp)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 11:timestamp), FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03, col 12:timestamp)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 12:timestamp), FilterTimestampColEqualTimestampScalar(col 13:timestamp, val 2001-01-01 01:02:03)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 0 00:00:00.000000000) -> 13:timestamp), FilterTimestampColNotEqualTimestampScalar(col 14:timestamp, val 2001-01-01 01:02:03)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 14:timestamp), FilterTimestampColGreaterEqualTimestampScalar(col 15:timestamp, val 2001-01-01 01:02:03)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 15:timestamp), FilterTimestampColGreaterTimestampScalar(col 16:timestamp, val 2001-01-01 01:02:03)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 16:timestamp), FilterTimestampColLessEqualTimestampScalar(col 17:timestamp, val 2001-01-01 01:02:03)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 17:timestamp), FilterTimestampColLessTimestampScalar(col 18:timestamp, val 2001-01-01 01:02:03)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 18:timestamp), FilterTimestampColEqualTimestampColumn(col 0:timestamp, col 19:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 0 00:00:00.000000000) -> 19:timestamp), FilterTimestampColNotEqualTimestampColumn(col 0:timestamp, col 20:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 20:timestamp), FilterTimestampColLessEqualTimestampColumn(col 0:timestamp, col 21:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 21:timestamp), FilterTimestampColLessTimestampColumn(col 0:timestamp, col 22:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 22:timestamp), FilterTimestampColGreaterEqualTimestampColumn(col 0:timestamp, col 23:timestamp)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 23:timestamp), FilterTimestampColGreaterTimestampColumn(col 0:timestamp, col 24:timestamp)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 24:timestamp)) - predicate: (((ts + INTERVAL'0 00:00:00.000000000') = TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') <> TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') > TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') >= TIMESTAMP'2001-01-01 01:02:03') and ((ts - INTERVAL'1 00:00:00.000000000') < TIMESTAMP'2001-01-01 01:02:03') and ((ts - INTERVAL'1 00:00:00.000000000') <= TIMESTAMP'2001-01-01 01:02:03') and (TIMESTAMP'2001-01-01 01:02:03' < (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <= (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <> (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' = (ts + INTERVAL'0 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' > (ts - INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' >= (ts - INTERVAL'1 00:00:00.000000000')) and (ts < (ts + INTERVAL'1 00:00:00.000000000')) and (ts <= (ts + INTERVAL'1 00:00:00.000000000')) and (ts <> (ts + INTERVAL'1 00:00:00.000000000')) and (ts = (ts + INTERVAL'0 00:00:00.000000000')) and (ts > (ts - INTERVAL'1 00:00:00.000000000')) and (ts >= (ts - INTERVAL'1 00:00:00.000000000'))) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterTimestampScalarEqualTimestampColumn(val 2001-01-01 01:02:03, col 7:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 0 00:00:00.000000000) -> 7:timestamp), FilterTimestampScalarNotEqualTimestampColumn(val 2001-01-01 01:02:03, col 8:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 8:timestamp), SelectColumnIsTrue(col 11:boolean)(children: VectorUDFAdaptor(TIMESTAMP'2001-01-01 01:02:03' BETWEEN (ts - INTERVAL'1 00:00:00.000000000') AND (ts + INTERVAL'1 00:00:00.000000000'))(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 9:timestamp, TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 10:timestamp) -> 11:boolean), FilterTimestampScalarLessTimestampColumn(val 2001-01-01 01:02:03, col 12:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 12:timestamp), FilterTimestampScalarGreaterTimestampColumn(val 2001-01-01 01:02:03, col 13:timestamp)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 13:timestamp), FilterTimestampColEqualTimestampScalar(col 14:timestamp, val 2001-01-01 01:02:03)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 0 00:00:00.000000000) -> 14:timestamp), FilterTimestampColNotEqualTimestampScalar(col 15:timestamp, val 2001-01-01 01:02:03)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 15:timestamp), FilterTimestampColGreaterTimestampScalar(col 16:timestamp, val 2001-01-01 01:02:03)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 16:timestamp), FilterTimestampColLessTimestampScalar(col 17:timestamp, val 2001-01-01 01:02:03)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 17:timestamp), FilterTimestampColEqualTimestampColumn(col 0:timestamp, col 18:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 0 00:00:00.000000000) -> 18:timestamp), FilterTimestampColNotEqualTimestampColumn(col 0:timestamp, col 19:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 19:timestamp), SelectColumnIsTrue(col 22:boolean)(children: VectorUDFAdaptor(ts BETWEEN (ts - INTERVAL'1 00:00:00.000000000') AND (ts + INTERVAL'1 00:00:00.000000000'))(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 20:timestamp, TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 21:timestamp) -> 22:boolean), FilterTimestampColLessTimestampColumn(col 0:timestamp, col 23:timestamp)(children: TimestampColAddIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 23:timestamp), FilterTimestampColGreaterTimestampColumn(col 0:timestamp, col 24:timestamp)(children: TimestampColSubtractIntervalDayTimeScalar(col 0:timestamp, val 1 00:00:00.000000000) -> 24:timestamp)) + predicate: (((ts + INTERVAL'0 00:00:00.000000000') = TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') <> TIMESTAMP'2001-01-01 01:02:03') and ((ts + INTERVAL'1 00:00:00.000000000') > TIMESTAMP'2001-01-01 01:02:03') and ((ts - INTERVAL'1 00:00:00.000000000') < TIMESTAMP'2001-01-01 01:02:03') and (TIMESTAMP'2001-01-01 01:02:03' < (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' <> (ts + INTERVAL'1 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' = (ts + INTERVAL'0 00:00:00.000000000')) and (TIMESTAMP'2001-01-01 01:02:03' > (ts - INTERVAL'1 00:00:00.000000000')) and (ts < (ts + INTERVAL'1 00:00:00.000000000')) and (ts <> (ts + INTERVAL'1 00:00:00.000000000')) and (ts = (ts + INTERVAL'0 00:00:00.000000000')) and (ts > (ts - INTERVAL'1 00:00:00.000000000')) and TIMESTAMP'2001-01-01 01:02:03' BETWEEN (ts - INTERVAL'1 00:00:00.000000000') AND (ts + INTERVAL'1 00:00:00.000000000') and ts BETWEEN (ts - INTERVAL'1 00:00:00.000000000') AND (ts + INTERVAL'1 00:00:00.000000000')) (type: boolean) Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) @@ -1967,7 +1967,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: true - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/mapjoin47.q.out ql/src/test/results/clientpositive/mapjoin47.q.out index 67b2f4cd03..865a99b9a3 100644 --- ql/src/test/results/clientpositive/mapjoin47.q.out +++ ql/src/test/results/clientpositive/mapjoin47.q.out @@ -530,7 +530,7 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col2 + _col5) >= 100.0D)} {((_col2 + _col5) <= 102.0D)} + residual filter predicates: {(_col2 + _col5) BETWEEN 100.0D AND 102.0D} Statistics: Num rows: 1388 Data size: 512172 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) diff --git ql/src/test/results/clientpositive/partition_wise_fileformat2.q.out ql/src/test/results/clientpositive/partition_wise_fileformat2.q.out index a1c5ab3071..4421f83e7d 100644 --- ql/src/test/results/clientpositive/partition_wise_fileformat2.q.out +++ ql/src/test/results/clientpositive/partition_wise_fileformat2.q.out @@ -76,10 +76,10 @@ STAGE PLANS: Processor Tree: TableScan alias: partition_test_partitioned - filterExpr: ((UDFToDouble(dt) >= 100.0D) and (UDFToDouble(dt) <= 102.0D)) (type: boolean) + filterExpr: UDFToDouble(dt) BETWEEN 100.0D AND 102.0D (type: boolean) Statistics: Num rows: 75 Data size: 26925 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: ((UDFToDouble(dt) <= 102.0D) and (UDFToDouble(dt) >= 100.0D)) (type: boolean) + predicate: UDFToDouble(dt) BETWEEN 100.0D AND 102.0D (type: boolean) Statistics: Num rows: 8 Data size: 2872 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), value (type: string), dt (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) @@ -200,10 +200,10 @@ STAGE PLANS: Processor Tree: TableScan alias: partition_test_partitioned - filterExpr: ((UDFToDouble(dt) >= 100.0D) and (UDFToDouble(dt) <= 102.0D)) (type: boolean) + filterExpr: UDFToDouble(dt) BETWEEN 100.0D AND 102.0D (type: boolean) Statistics: Num rows: 75 Data size: 26925 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: ((UDFToDouble(dt) <= 102.0D) and (UDFToDouble(dt) >= 100.0D)) (type: boolean) + predicate: UDFToDouble(dt) BETWEEN 100.0D AND 102.0D (type: boolean) Statistics: Num rows: 8 Data size: 2872 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string), value (type: string), dt (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index 660556ffe9..f8fe477e6b 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -3055,10 +3055,6 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 POSTHOOK: Input: default@pcr_t1@ds=2000-04-11 #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key`, `value`, `ds` -FROM `default`.`pcr_t1` -WHERE `ds` > '2000-04-08' AND `ds` < '2000-04-11' OR `ds` >= '2000-04-08' AND `ds` <= '2000-04-11' AND `key` = 2 -ORDER BY `key`, `value`, `ds` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git ql/src/test/results/clientpositive/perf/spark/query13.q.out ql/src/test/results/clientpositive/perf/spark/query13.q.out index 2b6c19d6c2..b5df0bcd7b 100644 --- ql/src/test/results/clientpositive/perf/spark/query13.q.out +++ ql/src/test/results/clientpositive/perf/spark/query13.q.out @@ -122,7 +122,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 10 Map Operator Tree: TableScan alias: store @@ -137,12 +137,12 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col5 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 8 + Map 9 Map Operator Tree: TableScan alias: household_demographics @@ -157,7 +157,7 @@ STAGE PLANS: Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -166,72 +166,52 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 45), Map 6 (PARTITION-LEVEL SORT, 45) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 218), Reducer 2 (PARTITION-LEVEL SORT, 218) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 55), Reducer 3 (PARTITION-LEVEL SORT, 55) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 596), Reducer 2 (PARTITION-LEVEL SORT, 596) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 366), Reducer 3 (PARTITION-LEVEL SORT, 366) Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: customer_demographics - filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (((ss_sales_price >= 100) or (ss_sales_price <= 150) or ss_sales_price is not null or (ss_sales_price <= 200)) and ((ss_net_profit >= 100) or (ss_net_profit <= 200) or ss_net_profit is not null or (ss_net_profit <= 300) or (ss_net_profit <= 250)) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + predicate: (((ss_net_profit >= 100) or (ss_net_profit <= 200) or ss_net_profit is not null or (ss_net_profit <= 300) or (ss_net_profit <= 250)) and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or ss_sales_price is not null or (ss_sales_price <= 200)) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cd_demo_sk (type: int), (cd_marital_status = 'M') (type: boolean), (cd_education_status = '4 yr Degree') (type: boolean), (cd_marital_status = 'D') (type: boolean), (cd_education_status = 'Primary') (type: boolean), (cd_marital_status = 'U') (type: boolean), (cd_education_status = 'Advanced Degree') (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit BETWEEN 100 AND 200 (type: boolean), ss_net_profit BETWEEN 150 AND 300 (type: boolean), ss_net_profit BETWEEN 50 AND 250 (type: boolean), ss_sales_price BETWEEN 100 AND 150 (type: boolean), ss_sales_price BETWEEN 50 AND 100 (type: boolean), ss_sales_price BETWEEN 150 AND 200 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) Execution mode: vectorized Map 6 Map Operator Tree: TableScan - alias: store_sales - filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit BETWEEN 100 AND 200 (type: boolean), ss_net_profit BETWEEN 150 AND 300 (type: boolean), ss_net_profit BETWEEN 50 AND 250 (type: boolean), ss_sales_price BETWEEN 100 AND 150 (type: boolean), ss_sales_price BETWEEN 50 AND 100 (type: boolean), ss_sales_price BETWEEN 150 AND 200 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 + Map 7 Map Operator Tree: TableScan alias: customer_address @@ -251,9 +231,27 @@ STAGE PLANS: Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean) Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: customer_demographics + filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), (cd_marital_status = 'M') (type: boolean), (cd_education_status = '4 yr Degree') (type: boolean), (cd_marital_status = 'D') (type: boolean), (cd_education_status = 'Primary') (type: boolean), (cd_marital_status = 'U') (type: boolean), (cd_education_status = 'Advanced Degree') (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean) + Execution mode: vectorized Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -261,79 +259,77 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col5 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - input vertices: - 1 Map 7 - Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col17, _col18 - input vertices: - 1 Map 8 - Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col17 (type: boolean), _col18 (type: boolean) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col17, _col18, _col20, _col21, _col22 - Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col16, _col17, _col18 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col20 and _col9) or (_col21 and _col10) or (_col22 and _col11)) (type: boolean) - Statistics: Num rows: 70276269 Data size: 6199792984 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col16 and _col8) or (_col17 and _col9) or (_col18 and _col10)) (type: boolean) + Statistics: Num rows: 522716061 Data size: 46114162643 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 70276269 Data size: 6199792984 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col12 (type: boolean), _col13 (type: boolean), _col14 (type: boolean), _col17 (type: boolean), _col18 (type: boolean) + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 522716061 Data size: 46114162643 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 (type: boolean), _col12 (type: boolean), _col13 (type: boolean) Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col6, _col7, _col8, _col12, _col13, _col14, _col17, _col18, _col24, _col25, _col26, _col27, _col28, _col29 - Statistics: Num rows: 77303897 Data size: 6819772430 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col24 and _col25 and _col12 and _col17) or (_col26 and _col27 and _col13 and _col18) or (_col28 and _col29 and _col14 and _col18)) (type: boolean) - Statistics: Num rows: 14494479 Data size: 1278707181 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)) - outputColumnNames: _col6, _col7, _col8 - Statistics: Num rows: 14494479 Data size: 1278707181 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col6), count(_col6), sum(_col7), count(_col7), sum(_col8), count(_col8) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col11, _col12, _col13, _col20, _col21, _col22, _col23, _col24, _col25 + Statistics: Num rows: 574987679 Data size: 50725580006 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col5, _col6, _col7, _col11, _col12, _col13, _col20, _col21, _col22, _col23, _col24, _col25, _col27, _col28 + input vertices: + 1 Map 9 + Statistics: Num rows: 632486460 Data size: 55798139215 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col20 and _col21 and _col11 and _col27) or (_col22 and _col23 and _col12 and _col28) or (_col24 and _col25 and _col13 and _col28)) (type: boolean) + Statistics: Num rows: 118591209 Data size: 10462150904 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col7 + input vertices: + 1 Map 10 + Statistics: Num rows: 130450332 Data size: 11508366243 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col5), count(_col5), sum(_col6), count(_col6), sum(_col7), count(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) Reducer 5 Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/perf/spark/query21.q.out ql/src/test/results/clientpositive/perf/spark/query21.q.out index 6c2efe75c9..091c3ee042 100644 --- ql/src/test/results/clientpositive/perf/spark/query21.q.out +++ ql/src/test/results/clientpositive/perf/spark/query21.q.out @@ -231,12 +231,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col2 > 0L)) THEN ((UDFToDouble(_col3) / UDFToDouble(_col2)) BETWEEN 0.666667D AND 1.5D) ELSE (null) END (type: boolean) - Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE + predicate: (CASE WHEN ((_col2 > 0L)) THEN (((UDFToDouble(_col3) / UDFToDouble(_col2)) <= 1.5D)) ELSE (null) END and CASE WHEN ((_col2 > 0L)) THEN ((0.666667D <= (UDFToDouble(_col3) / UDFToDouble(_col2)))) ELSE (null) END) (type: boolean) + Statistics: Num rows: 6253038 Data size: 98796990 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ - Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6253038 Data size: 98796990 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 4 @@ -245,7 +245,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6253038 Data size: 98796990 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 1500 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/perf/spark/query34.q.out ql/src/test/results/clientpositive/perf/spark/query34.q.out index 1dd58b399f..517f35282a 100644 --- ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -153,10 +153,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_year) IN (2000, 2001, 2002) and (d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and d_date_sk is not null) (type: boolean) + filterExpr: ((d_year) IN (2000, 2001, 2002) and ((d_dom >= 1) or (d_dom <= 3) or (d_dom >= 25) or (d_dom <= 28)) and (d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) (type: boolean) + predicate: (((d_dom >= 1) or (d_dom <= 3) or (d_dom >= 25) or (d_dom <= 28)) and (d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) diff --git ql/src/test/results/clientpositive/perf/spark/query48.q.out ql/src/test/results/clientpositive/perf/spark/query48.q.out index 024d205290..3c99f4f66d 100644 --- ql/src/test/results/clientpositive/perf/spark/query48.q.out +++ ql/src/test/results/clientpositive/perf/spark/query48.q.out @@ -150,7 +150,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 9 Map Operator Tree: TableScan alias: store @@ -165,7 +165,7 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -174,52 +174,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 46), Map 6 (PARTITION-LEVEL SORT, 46) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 49), Reducer 2 (PARTITION-LEVEL SORT, 49) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 218), Reducer 3 (PARTITION-LEVEL SORT, 218) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 147), Reducer 2 (PARTITION-LEVEL SORT, 147) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 319), Reducer 3 (PARTITION-LEVEL SORT, 319) Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: customer_demographics - filterExpr: ((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cd_demo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 6 - Map Operator Tree: - TableScan alias: store_sales - filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) + filterExpr: (((ss_sales_price >= 100) or (ss_sales_price <= 150) or ss_sales_price is not null or (ss_sales_price <= 200)) and ((ss_net_profit >= 0) or (ss_net_profit <= 2000) or ss_net_profit is not null or (ss_net_profit <= 3000) or (ss_net_profit <= 25000)) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + predicate: (((ss_net_profit >= 0) or (ss_net_profit <= 2000) or ss_net_profit is not null or (ss_net_profit <= 3000) or (ss_net_profit <= 25000)) and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or ss_sales_price is not null or (ss_sales_price <= 200)) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_net_profit BETWEEN 0 AND 2000 (type: boolean), ss_net_profit BETWEEN 150 AND 3000 (type: boolean), ss_net_profit BETWEEN 50 AND 25000 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Execution mode: vectorized - Map 7 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -238,7 +219,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 + Map 7 + Map Operator Tree: + TableScan + alias: customer_demographics + filterExpr: ((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: customer_address @@ -265,18 +265,16 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) + Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Reducer 3 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: @@ -284,43 +282,41 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7, _col8 - input vertices: - 1 Map 8 - Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col6, _col7, _col8, _col12, _col13, _col14 - Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col11, _col12, _col13 + Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col12 and _col6) or (_col13 and _col7) or (_col14 and _col8)) (type: boolean) - Statistics: Num rows: 70276269 Data size: 6199792984 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 - Statistics: Num rows: 70276269 Data size: 6199792984 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col11 and _col5) or (_col12 and _col6) or (_col13 and _col7)) (type: boolean) + Statistics: Num rows: 191662557 Data size: 16908526469 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4 + input vertices: + 1 Map 9 + Statistics: Num rows: 210828817 Data size: 18599379519 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) + aggregations: sum(_col4) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/perf/spark/query54.q.out ql/src/test/results/clientpositive/perf/spark/query54.q.out index 690f1d4cdf..b3c95662b0 100644 --- ql/src/test/results/clientpositive/perf/spark/query54.q.out +++ ql/src/test/results/clientpositive/perf/spark/query54.q.out @@ -1,7 +1,4 @@ -Warning: Shuffle Join JOIN[111][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product -Warning: Shuffle Join JOIN[107][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Work 'Reducer 14' is a cross product -Warning: Shuffle Join JOIN[114][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Work 'Reducer 4' is a cross product -Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[107][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -132,86 +129,15 @@ POSTHOOK: Input: default@web_sales #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 29 <- Map 28 (GROUP, 2) - Reducer 30 <- Reducer 29 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 28 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (d_month_seq + 1) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Reducer 29 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 30 - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-3 - Spark #### A masked pattern was here #### Vertices: - Map 18 + Map 11 Map Operator Tree: TableScan alias: store @@ -235,52 +161,28 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Reducer 9 (GROUP, 1) - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 15 (PARTITION-LEVEL SORT, 398) - Reducer 13 <- Reducer 12 (PARTITION-LEVEL SORT, 772), Reducer 17 (PARTITION-LEVEL SORT, 772) - Reducer 14 <- Reducer 13 (PARTITION-LEVEL SORT, 1), Reducer 32 (PARTITION-LEVEL SORT, 1) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 654), Reducer 23 (PARTITION-LEVEL SORT, 654) - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 20 <- Map 19 (PARTITION-LEVEL SORT, 458), Map 24 (PARTITION-LEVEL SORT, 458), Map 25 (PARTITION-LEVEL SORT, 458) - Reducer 21 <- Map 26 (PARTITION-LEVEL SORT, 505), Reducer 20 (PARTITION-LEVEL SORT, 505) - Reducer 22 <- Map 27 (PARTITION-LEVEL SORT, 1009), Reducer 21 (PARTITION-LEVEL SORT, 1009) - Reducer 23 <- Reducer 22 (GROUP, 610) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 32 <- Map 31 (GROUP, 2) - Reducer 4 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) + Reducer 10 <- Map 9 (PARTITION-LEVEL SORT, 654), Reducer 16 (PARTITION-LEVEL SORT, 654) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 458), Map 17 (PARTITION-LEVEL SORT, 458), Map 18 (PARTITION-LEVEL SORT, 458) + Reducer 14 <- Map 19 (PARTITION-LEVEL SORT, 505), Reducer 13 (PARTITION-LEVEL SORT, 505) + Reducer 15 <- Map 20 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009) + Reducer 16 <- Reducer 15 (GROUP, 610) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) + Reducer 22 <- Map 21 (GROUP, 2) + Reducer 23 <- Reducer 22 (GROUP, 1) + Reducer 25 <- Map 24 (GROUP, 2) + Reducer 26 <- Reducer 25 (GROUP, 1) + Reducer 28 <- Map 24 (GROUP, 2) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 772), Reducer 2 (PARTITION-LEVEL SORT, 772) + Reducer 30 <- Map 21 (GROUP, 2) + Reducer 4 <- Reducer 23 (PARTITION-LEVEL SORT, 1), Reducer 26 (PARTITION-LEVEL SORT, 1), Reducer 28 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1), Reducer 30 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP, 1009) Reducer 6 <- Reducer 5 (GROUP, 1009) Reducer 7 <- Reducer 6 (SORT, 1) - Reducer 9 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (d_month_seq + 3) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 11 - Map Operator Tree: - TableScan alias: store_sales filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE @@ -298,58 +200,7 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Map 15 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: d_date_sk is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_month_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized - Map 16 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: (ca_address_sk is not null and ca_county is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ca_address_sk is not null and ca_county is not null and ca_state is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string), _col2 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 18 - Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 19 + Map 12 Map Operator Tree: TableScan alias: catalog_sales @@ -369,7 +220,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 24 + Map 17 Map Operator Tree: TableScan alias: web_sales @@ -389,7 +240,7 @@ STAGE PLANS: Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Map 25 + Map 18 Map Operator Tree: TableScan alias: date_dim @@ -408,7 +259,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 26 + Map 19 Map Operator Tree: TableScan alias: item @@ -427,7 +278,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 27 + Map 20 Map Operator Tree: TableScan alias: customer @@ -447,7 +298,31 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Execution mode: vectorized - Map 31 + Map 21 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (d_month_seq + 3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 24 Map Operator Tree: TableScan alias: date_dim @@ -471,83 +346,58 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 10 + Map 8 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: d_date_sk is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_month_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 9 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: (ca_address_sk is not null and ca_county is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ca_address_sk is not null and ca_county is not null and ca_state is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 11 + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 12 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) - Reducer 13 Local Work: Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col5 (type: int) - outputColumnNames: _col2, _col4, _col10 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col2, _col4, _col10 - input vertices: - 1 Reducer 30 - Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 696954748 Data size: 67758142923 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int) - Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col2, _col4, _col10, _col13 - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col10 (type: int), _col2 (type: decimal(7,2)), _col4 (type: int), _col13 (type: int) - outputColumnNames: _col0, _col4, _col11, _col13 - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 6363893803988 Data size: 7746260663496473 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int) - Reducer 17 + Reducer 10 Reduce Operator Tree: Join Operator condition map: @@ -562,19 +412,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col5 (type: int) Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 20 + Reducer 13 Reduce Operator Tree: Join Operator condition map: @@ -590,7 +428,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Reducer 21 + Reducer 14 Reduce Operator Tree: Join Operator condition map: @@ -605,7 +443,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE - Reducer 22 + Reducer 15 Reduce Operator Tree: Join Operator condition map: @@ -625,7 +463,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE - Reducer 23 + Reducer 16 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -643,24 +481,119 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 3 + Reducer 2 Reduce Operator Tree: Join Operator condition map: - Right Outer Join 0 to 1 - filter predicates: - 0 - 1 {true} + Inner Join 0 to 1 keys: - 0 - 1 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: int) + Reducer 22 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 23 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 25 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 26 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 28 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 9131 Data size: 10299768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Reducer 32 + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col5 (type: int) + outputColumnNames: _col2, _col4, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int) + Reducer 30 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -677,34 +610,36 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + Inner Join 0 to 4 keys: 0 1 - outputColumnNames: _col0, _col2, _col6, _col13, _col15 + 2 + 3 + 4 + outputColumnNames: _col2, _col4, _col10, _col14, _col15 Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 58108714324214428 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 BETWEEN _col3 AND _col4 (type: boolean) + Filter Operator + predicate: ((_col14 <= _col4) and (_col4 <= _col15)) (type: boolean) + Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: decimal(7,2)), _col10 (type: int) + outputColumnNames: _col2, _col10 Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: decimal(7,2)) + Group By Operator + aggregations: sum(_col2) + keys: _col10 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6456523813801603 Data size: 1024819115206086144 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)) + value expressions: _col1 (type: decimal(17,2)) Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -766,25 +701,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/perf/spark/query58.q.out ql/src/test/results/clientpositive/perf/spark/query58.q.out index d3c7e0b84c..b616073fd0 100644 --- ql/src/test/results/clientpositive/perf/spark/query58.q.out +++ ql/src/test/results/clientpositive/perf/spark/query58.q.out @@ -926,7 +926,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (0.9 * _col1) (type: decimal(19,3)), (1.1 * _col1) (type: decimal(20,3)) + expressions: _col0 (type: string), _col1 (type: decimal(17,2)), (1.1 * _col1) (type: decimal(20,3)), (0.9 * _col1) (type: decimal(19,3)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -934,7 +934,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(20,3)), _col3 (type: decimal(19,3)) Reducer 40 Execution mode: vectorized Reduce Operator Tree: @@ -961,7 +961,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col9, _col10, _col11 Statistics: Num rows: 766650239 Data size: 67634106674 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 BETWEEN _col10 AND _col11 and _col1 BETWEEN _col6 AND _col7 and _col5 BETWEEN _col10 AND _col11 and _col5 BETWEEN _col2 AND _col3 and _col9 BETWEEN _col2 AND _col3 and _col9 BETWEEN _col6 AND _col7) (type: boolean) + predicate: (_col1 BETWEEN _col10 AND _col11 and _col1 BETWEEN _col6 AND _col7 and _col5 BETWEEN _col10 AND _col11 and _col5 BETWEEN _col3 AND _col2 and _col9 BETWEEN _col3 AND _col7 and _col9 BETWEEN _col6 AND _col2) (type: boolean) Statistics: Num rows: 1442 Data size: 127213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col5 (type: decimal(17,2)), (((_col5 / ((_col5 + _col1) + _col9)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col5 + _col1) + _col9)) / 3) * 100) (type: decimal(38,17)), _col9 (type: decimal(17,2)), (((_col9 / ((_col5 + _col1) + _col9)) / 3) * 100) (type: decimal(38,17)), (((_col5 + _col1) + _col9) / 3) (type: decimal(23,6)) diff --git ql/src/test/results/clientpositive/perf/spark/query85.q.out ql/src/test/results/clientpositive/perf/spark/query85.q.out index a7bf288044..de1bf24eff 100644 --- ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -182,7 +182,8 @@ POSTHOOK: Input: default@web_sales #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -190,7 +191,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 12 + Map 15 Map Operator Tree: TableScan alias: web_page @@ -205,12 +206,17 @@ STAGE PLANS: Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col10 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Map 13 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 14 Map Operator Tree: TableScan alias: reason @@ -234,77 +240,38 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 28), Map 9 (PARTITION-LEVEL SORT, 28) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 25), Reducer 2 (PARTITION-LEVEL SORT, 25) - Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 21), Reducer 3 (PARTITION-LEVEL SORT, 21) - Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 186), Reducer 4 (PARTITION-LEVEL SORT, 186) - Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 29), Reducer 5 (PARTITION-LEVEL SORT, 29) - Reducer 7 <- Reducer 6 (GROUP, 10) - Reducer 8 <- Reducer 7 (SORT, 1) + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 198), Reducer 9 (PARTITION-LEVEL SORT, 198) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 6 (PARTITION-LEVEL SORT, 154) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 401), Reducer 2 (PARTITION-LEVEL SORT, 401) + Reducer 4 <- Reducer 3 (GROUP, 58) + Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 169), Map 7 (PARTITION-LEVEL SORT, 169) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 181), Reducer 8 (PARTITION-LEVEL SORT, 181) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: web_returns - filterExpr: (wr_item_sk is not null and wr_order_number is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_addr_sk is not null and wr_reason_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) (type: boolean) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: wr_item_sk (type: int), wr_refunded_cdemo_sk (type: int), wr_refunded_addr_sk (type: int), wr_returning_cdemo_sk (type: int), wr_reason_sk (type: int), wr_order_number (type: int), wr_fee (type: decimal(7,2)), wr_refunded_cash (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col5 (type: int) - Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) - Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: cd2 - filterExpr: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (((ws_sales_price >= 100) or (ws_sales_price <= 150) or ws_sales_price is not null or (ws_sales_price <= 200)) and ((ws_net_profit >= 100) or (ws_net_profit <= 200) or ws_net_profit is not null or (ws_net_profit <= 300) or (ws_net_profit <= 250)) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + predicate: (((ws_net_profit >= 100) or (ws_net_profit <= 200) or ws_net_profit is not null or (ws_net_profit <= 300) or (ws_net_profit <= 250)) and ((ws_sales_price >= 100) or (ws_sales_price <= 150) or ws_sales_price is not null or (ws_sales_price <= 200)) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_profit BETWEEN 100 AND 200 (type: boolean), ws_net_profit BETWEEN 150 AND 300 (type: boolean), ws_net_profit BETWEEN 50 AND 250 (type: boolean), ws_sales_price BETWEEN 100 AND 150 (type: boolean), ws_sales_price BETWEEN 50 AND 100 (type: boolean), ws_sales_price BETWEEN 150 AND 200 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean) Execution mode: vectorized Map 11 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 14 - Map Operator Tree: - TableScan alias: customer_address filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE @@ -322,7 +289,27 @@ STAGE PLANS: Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean) Execution mode: vectorized - Map 15 + Map 12 + Map Operator Tree: + TableScan + alias: cd2 + filterExpr: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized + Map 13 Map Operator Tree: TableScan alias: cd1 @@ -342,59 +329,46 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized - Map 9 + Map 6 Map Operator Tree: TableScan - alias: web_sales - filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_profit BETWEEN 100 AND 200 (type: boolean), ws_net_profit BETWEEN 150 AND 300 (type: boolean), ws_net_profit BETWEEN 50 AND 250 (type: boolean), ws_sales_price BETWEEN 100 AND 150 (type: boolean), ws_sales_price BETWEEN 50 AND 100 (type: boolean), ws_sales_price BETWEEN 150 AND 200 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14, _col15, _col16, _col17, _col18 - Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: int), _col10 (type: int), _col12 (type: int), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col20, _col21 - Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col10 (type: int), _col12 (type: int), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col20 (type: string), _col21 (type: string) - Reducer 4 + Map 7 + Map Operator Tree: + TableScan + alias: web_returns + filterExpr: (wr_item_sk is not null and wr_order_number is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_addr_sk is not null and wr_reason_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) (type: boolean) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wr_item_sk (type: int), wr_refunded_cdemo_sk (type: int), wr_refunded_addr_sk (type: int), wr_returning_cdemo_sk (type: int), wr_reason_sk (type: int), wr_order_number (type: int), wr_fee (type: decimal(7,2)), wr_refunded_cash (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) + Execution mode: vectorized + Reducer 10 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -402,85 +376,84 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col8 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6, _col7, _col10, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col20, _col21 - Statistics: Num rows: 21296393 Data size: 2895693863 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col13 (type: string), _col14 (type: string) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col18, _col19, _col20, _col21, _col22, _col23 + Statistics: Num rows: 26620000 Data size: 27016104217 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col10 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4, _col6, _col7, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col20, _col21 + outputColumnNames: _col0, _col5, _col6, _col7, _col9, _col10, _col11, _col18, _col19, _col20, _col21, _col22, _col23, _col25 input vertices: - 1 Map 12 - Statistics: Num rows: 23426032 Data size: 3185263318 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col6, _col7, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col20, _col21, _col25 - input vertices: - 1 Map 13 - Statistics: Num rows: 25768635 Data size: 3503789725 Basic stats: COMPLETE Column stats: NONE + 1 Map 14 + Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col21 (type: boolean), _col22 (type: boolean), _col23 (type: boolean), _col0 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean), _col25 (type: string) + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col14, _col15, _col16, _col18, _col19, _col20, _col25 + Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 25768635 Data size: 3503789725 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: boolean), _col14 (type: boolean), _col15 (type: boolean), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col20 (type: string), _col21 (type: string), _col25 (type: string) - Reducer 5 + key expressions: _col9 (type: int), _col14 (type: int) + sort order: ++ + Map-reduce partition columns: _col9 (type: int), _col14 (type: int) + Statistics: Num rows: 29282000 Data size: 29717715282 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)), _col18 (type: boolean), _col19 (type: boolean), _col20 (type: boolean), _col25 (type: string) + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col6, _col7, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col20, _col21, _col25, _col27, _col28, _col29 - Statistics: Num rows: 28345499 Data size: 3854168781 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col27 and _col13) or (_col28 and _col14) or (_col29 and _col15)) (type: boolean) - Statistics: Num rows: 21259122 Data size: 2890626279 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col20 (type: string), _col21 (type: string) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col20 (type: string), _col21 (type: string) - Statistics: Num rows: 21259122 Data size: 2890626279 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col16 (type: boolean), _col17 (type: boolean), _col18 (type: boolean), _col25 (type: string) - Reducer 6 + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean), _col10 (type: boolean) + Reducer 3 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int), _col20 (type: string), _col21 (type: string) - 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col6, _col7, _col12, _col16, _col17, _col18, _col25, _col33, _col34, _col35, _col36, _col37, _col38 - Statistics: Num rows: 23385034 Data size: 3179688975 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: int), _col3 (type: int) + 1 _col9 (type: int), _col14 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col15, _col16, _col17, _col18, _col19, _col20, _col27, _col28, _col30, _col31, _col32, _col37 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col33 and _col34 and _col16) or (_col35 and _col36 and _col17) or (_col37 and _col38 and _col18)) (type: boolean) - Statistics: Num rows: 8769387 Data size: 1192383263 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col25 (type: string) - outputColumnNames: _col6, _col7, _col12, _col25 - Statistics: Num rows: 8769387 Data size: 1192383263 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col15 and _col16 and _col8) or (_col17 and _col18 and _col9) or (_col19 and _col20 and _col10)) and ((_col30 and _col5) or (_col31 and _col6) or (_col32 and _col7))) (type: boolean) + Statistics: Num rows: 49005909 Data size: 6663386377 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col27, _col28, _col37 + input vertices: + 1 Map 15 + Statistics: Num rows: 53906501 Data size: 7329725173 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col12), count(_col12), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col25 (type: string) + aggregations: sum(_col4), count(_col4), sum(_col28), count(_col28), sum(_col27), count(_col27) + keys: _col37 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8769387 Data size: 1192383263 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53906501 Data size: 7329725173 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8769387 Data size: 1192383263 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53906501 Data size: 7329725173 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) - Reducer 7 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -488,23 +461,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 4384693 Data size: 596191563 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26953250 Data size: 3664862518 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (UDFToDouble(_col1) / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), substr(_col0, 1, 20) (type: string) outputColumnNames: _col4, _col5, _col6, _col7 - Statistics: Num rows: 4384693 Data size: 596191563 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26953250 Data size: 3664862518 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22)) sort order: ++++ - Statistics: Num rows: 4384693 Data size: 596191563 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26953250 Data size: 3664862518 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 8 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(37,22)), KEY.reducesinkkey3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4384693 Data size: 596191563 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26953250 Data size: 3664862518 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE @@ -515,6 +488,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col13, _col14 + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col13 (type: string), _col14 (type: string) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col13 (type: string), _col14 (type: string) + Statistics: Num rows: 24200000 Data size: 24560094211 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col9 (type: boolean), _col10 (type: boolean), _col11 (type: boolean) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out index 19f3039781..412cc67334 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out @@ -114,28 +114,28 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], $f3=[CAST($4):DECIMAL(17, 2)]) - HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)], agg#2=[sum($22)], agg#3=[count($22)], agg#4=[sum($23)], agg#5=[count($23)]) - HiveJoin(condition=[AND(=($0, $17), OR(AND($1, $2, $27, $12), AND($3, $4, $28, $13), AND($5, $6, $29, $13)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveJoin(condition=[AND(=($12, $0), OR(AND($1, $17), AND($2, $18), AND($3, $19)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) - HiveFilter(condition=[AND(IN($3, 3, 1), IS NOT NULL($0))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{}], agg#0=[sum($5)], agg#1=[count($5)], agg#2=[sum($6)], agg#3=[count($6)], agg#4=[sum($7)], agg#5=[count($7)]) + HiveJoin(condition=[=($29, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($2, $26), OR(AND($20, $21, $11, $27), AND($22, $23, $12, $28), AND($24, $25, $13, $28)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($19, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $15), OR(AND($16, $8), AND($17, $9), AND($18, $10)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100, 200)], BETWEEN9=[BETWEEN(false, $22, 150, 300)], BETWEEN10=[BETWEEN(false, $22, 50, 250)], BETWEEN11=[BETWEEN(false, $13, 100, 150)], BETWEEN12=[BETWEEN(false, $13, 50, 100)], BETWEEN13=[BETWEEN(false, $13, 150, 200)]) + HiveFilter(condition=[AND(OR(<=(100, $13), <=($13, 150), IS NOT NULL($13), <=($13, 200)), OR(<=(100, $22), <=($22, 200), IS NOT NULL($22), <=($22, 300), <=($22, 250)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100, 200)], BETWEEN9=[BETWEEN(false, $22, 150, 300)], BETWEEN10=[BETWEEN(false, $22, 50, 250)], BETWEEN11=[BETWEEN(false, $13, 100, 150)], BETWEEN12=[BETWEEN(false, $13, 50, 100)], BETWEEN13=[BETWEEN(false, $13, 150, 200)]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 100, 200), BETWEEN(false, $22, 150, 300), BETWEEN(false, $22, 50, 250)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) + HiveFilter(condition=[AND(IN($3, 3, 1), IS NOT NULL($0))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out index 3a675034ef..0365697005 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query21.q.out @@ -69,7 +69,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) - HiveFilter(condition=[CASE(>($2, 0), BETWEEN(false, /(CAST($3):DOUBLE, CAST($2):DOUBLE), 6.66667E-1, 1.5E0), null)]) + HiveFilter(condition=[AND(CASE(>($2, 0), <=(6.66667E-1, /(CAST($3):DOUBLE, CAST($2):DOUBLE)), null), CASE(>($2, 0), <=(/(CAST($3):DOUBLE, CAST($2):DOUBLE), 1.5E0), null))]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) HiveProject($f0=[$1], $f1=[$10], $f2=[CASE($7, $5, 0)], $f3=[CASE($8, $5, 0)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out index 9299409a89..b2a6320acb 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query34.q.out @@ -88,7 +88,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), OR(BETWEEN(false, $9, 1, 3), BETWEEN(false, $9, 25, 28)), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), OR(<=(1, $9), <=($9, 3), <=(25, $9), <=($9, 28)), OR(BETWEEN(false, $9, 1, 3), BETWEEN(false, $9, 25, 28)), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), >($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1.2), null), IS NOT NULL($0))]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out index 079556ec99..22b051d06a 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query48.q.out @@ -141,24 +141,24 @@ POSTHOOK: Input: default@store POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveAggregate(group=[{}], agg#0=[sum($11)]) - HiveJoin(condition=[AND(=($9, $0), OR(AND($1, $12), AND($2, $13), AND($3, $14)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) +HiveAggregate(group=[{}], agg#0=[sum($10)]) + HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $10), AND($2, $11), AND($3, $12)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0, 2000)], BETWEEN6=[BETWEEN(false, $22, 150, 3000)], BETWEEN7=[BETWEEN(false, $22, 50, 25000)]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 0, 2000), BETWEEN(false, $22, 150, 3000), BETWEEN(false, $22, 50, 25000)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($6), IS NOT NULL($0))]) + HiveFilter(condition=[AND(OR(<=(100, $13), <=($13, 150), IS NOT NULL($13), <=($13, 200)), OR(<=(0, $22), <=($22, 2000), IS NOT NULL($22), <=($22, 3000), <=($22, 25000)), OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($6), IS NOT NULL($0))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out index 50fa078159..39b6a6be15 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain cbo with my_customers as ( select distinct c_customer_sk @@ -135,72 +135,50 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(segment=[$0], num_customers=[$1], segment_base=[*($0, 50)]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject(segment=[CAST(/($1, CAST(50):DECIMAL(10, 0))):INTEGER]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveFilter(condition=[BETWEEN(false, $2, $3, $4)]) - HiveProject(c_customer_sk=[$0], ss_ext_sales_price=[$4], d_month_seq=[$11], _o__c0=[$13], $f0=[$14]) + HiveAggregate(group=[{10}], agg#0=[sum($2)]) + HiveJoin(condition=[<=($4, $15)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[<=($14, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$10], $f1=[$11], ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_ext_sales_price=[$2], ca_address_sk=[$5], ca_county=[$6], ca_state=[$7], s_county=[$8], s_state=[$9], d_date_sk=[$3], d_month_seq=[$4], cnt=[$12], $f00=[$13]) - HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($10, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_month_seq=[$3]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($8))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_county=[$23], s_state=[$24]) - HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) - HiveAggregate(group=[{0, 1}]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($10, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) + HiveAggregate(group=[{0, 1}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) - HiveUnion(all=[true]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($8, 3), =($6, 1999), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Jewelry'), =($10, _UTF-16LE'consignment'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{0}]) - HiveProject($f0=[+($3, 1)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($15), IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 3), =($6, 1999), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{0}]) - HiveProject($f0=[+($3, 1)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[true], joinType=[right], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{0}]) - HiveProject($f0=[+($3, 3)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Jewelry'), =($10, _UTF-16LE'consignment'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(cnt=[$0]) HiveFilter(condition=[<=(sq_count_check($0), 1)]) HiveProject(cnt=[$0]) @@ -210,4 +188,23 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject($f0=[+($3, 3)]) HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 3)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out index b4410ff07a..a6f331fd36 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out @@ -142,9 +142,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(item_id=[$0], ss_item_rev=[$5], ss_dev=[*(/(/($5, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$9], ws_dev=[*(/(/($9, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($5, $1), $9), CAST(3):DECIMAL(10, 0))]) - HiveJoin(condition=[AND(AND(AND(AND(=($0, $8), BETWEEN(false, $5, $10, $11)), BETWEEN(false, $1, $10, $11)), BETWEEN(false, $9, $6, $7)), BETWEEN(false, $9, $2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(AND(=($4, $0), BETWEEN(false, $5, $2, $3)), BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) + HiveJoin(condition=[AND(AND(AND(AND(=($0, $8), BETWEEN(false, $5, $10, $11)), BETWEEN(false, $1, $10, $11)), BETWEEN(false, $9, $6, $2)), BETWEEN(false, $9, $3, $7))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($4, $0), BETWEEN(false, $5, $3, $2)), BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$0], $f1=[$1], *=[*(1.1, $1)], *3=[*(0.9, $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out index 5a5bb797a1..356778b4a7 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query64.q.out @@ -268,62 +268,58 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveProject(product_name=[$0], store_name=[$2], store_zip=[$3], b_street_number=[$4], b_streen_name=[$5], b_city=[$6], b_zip=[$7], c_street_number=[$8], c_street_name=[$9], c_city=[$10], c_zip=[$11], cnt=[$12], s1=[$13], s2=[$14], s3=[$15], s11=[$20], s21=[$21], s31=[$22], cnt1=[$19]) HiveJoin(condition=[AND(AND(AND(=($1, $16), <=($19, $12)), =($2, $17)), =($3, $18))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$13], $f1=[$12], $f2=[$10], $f3=[$11], $f4=[$6], $f5=[$7], $f6=[$8], $f7=[$9], $f8=[$2], $f9=[$3], $f10=[$4], $f11=[$5], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) - HiveAggregate(group=[{9, 11, 16, 17, 18, 19, 25, 26, 27, 28, 30, 31, 48, 49}], agg#0=[count()], agg#1=[sum($45)], agg#2=[sum($46)], agg#3=[sum($47)]) - HiveJoin(condition=[AND(<>($1, $21), =($39, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[=($36, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveAggregate(group=[{9, 11, 16, 17, 18, 19, 25, 26, 27, 28, 30, 31, 47, 48}], agg#0=[count()], agg#1=[sum($44)], agg#2=[sum($45)], agg#3=[sum($46)]) + HiveJoin(condition=[=($33, $51)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(<>($1, $21), =($38, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($35, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(d_date_sk=[$0], d_year=[$6]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - HiveProject(ib_income_band_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], ib_income_band_sk=[$12], p_promo_sk=[$13], ss_sold_date_sk=[$14], ss_item_sk=[$15], ss_customer_sk=[$16], ss_cdemo_sk=[$17], ss_hdemo_sk=[$18], ss_addr_sk=[$19], ss_store_sk=[$20], ss_promo_sk=[$21], ss_ticket_number=[$22], ss_wholesale_cost=[$23], ss_list_price=[$24], ss_coupon_amt=[$25], i_item_sk=[$26], i_product_name=[$27], d_date_sk=[$28], $f0=[$29]) - HiveJoin(condition=[AND(=($15, $0), =($22, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($17, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) - HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) HiveProject(ib_income_band_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, income_band]], table:alias=[ib1]) - HiveProject(p_promo_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12], i_item_sk=[$13], i_product_name=[$14], d_date_sk=[$15], $f0=[$16]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], p_promo_sk=[$12], ss_sold_date_sk=[$13], ss_item_sk=[$14], ss_customer_sk=[$15], ss_cdemo_sk=[$16], ss_hdemo_sk=[$17], ss_addr_sk=[$18], ss_store_sk=[$19], ss_promo_sk=[$20], ss_ticket_number=[$21], ss_wholesale_cost=[$22], ss_list_price=[$23], ss_coupon_amt=[$24], i_item_sk=[$25], i_product_name=[$26], d_date_sk=[$27], $f0=[$28]) + HiveJoin(condition=[AND(=($14, $0), =($21, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($16, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(p_promo_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) @@ -335,7 +331,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 36, 45), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), IS NOT NULL($0))]) @@ -350,63 +346,62 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject($f1=[$12], $f2=[$10], $f3=[$11], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) - HiveAggregate(group=[{9, 11, 16, 17, 18, 19, 25, 26, 27, 28, 30, 31, 48, 49}], agg#0=[count()], agg#1=[sum($45)], agg#2=[sum($46)], agg#3=[sum($47)]) - HiveJoin(condition=[AND(<>($1, $21), =($39, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveProject(ib_income_band_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[=($36, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib1]) + HiveProject($f1=[$12], $f2=[$10], $f3=[$11], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) + HiveAggregate(group=[{9, 11, 16, 17, 18, 19, 25, 26, 27, 28, 30, 31, 47, 48}], agg#0=[count()], agg#1=[sum($44)], agg#2=[sum($45)], agg#3=[sum($46)]) + HiveJoin(condition=[=($33, $51)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(<>($1, $21), =($38, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($35, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $18)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) HiveProject(d_date_sk=[$0], d_year=[$6]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - HiveProject(ib_income_band_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], ib_income_band_sk=[$12], p_promo_sk=[$13], ss_sold_date_sk=[$14], ss_item_sk=[$15], ss_customer_sk=[$16], ss_cdemo_sk=[$17], ss_hdemo_sk=[$18], ss_addr_sk=[$19], ss_store_sk=[$20], ss_promo_sk=[$21], ss_ticket_number=[$22], ss_wholesale_cost=[$23], ss_list_price=[$24], ss_coupon_amt=[$25], i_item_sk=[$26], i_product_name=[$27], d_date_sk=[$28], $f0=[$29]) - HiveJoin(condition=[AND(=($15, $0), =($22, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($17, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) - HiveJoin(condition=[=($13, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) HiveProject(ib_income_band_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, income_band]], table:alias=[ib1]) - HiveProject(p_promo_sk=[$0], ss_sold_date_sk=[$1], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_promo_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12], i_item_sk=[$13], i_product_name=[$14], d_date_sk=[$15], $f0=[$16]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib2]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], hd_income_band_sk=[$11], p_promo_sk=[$12], ss_sold_date_sk=[$13], ss_item_sk=[$14], ss_customer_sk=[$15], ss_cdemo_sk=[$16], ss_hdemo_sk=[$17], ss_addr_sk=[$18], ss_store_sk=[$19], ss_promo_sk=[$20], ss_ticket_number=[$21], ss_wholesale_cost=[$22], ss_list_price=[$23], ss_coupon_amt=[$24], i_item_sk=[$25], i_product_name=[$26], d_date_sk=[$27], $f0=[$28]) + HiveJoin(condition=[AND(=($14, $0), =($21, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($16, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) HiveJoin(condition=[=($8, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(p_promo_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) @@ -418,7 +413,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9), IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50), IS NOT NULL($0))]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 36, 45), IS NOT NULL($0))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) @@ -433,4 +428,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($16))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, income_band]], table:alias=[ib1]) diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out index f5a71b422b..0963936768 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out @@ -184,36 +184,37 @@ CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], _o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 20)=[substr($0, 1, 20)]) - HiveAggregate(group=[{14}], agg#0=[sum($32)], agg#1=[count($32)], agg#2=[sum($27)], agg#3=[count($27)], agg#4=[sum($26)], agg#5=[count($26)]) - HiveJoin(condition=[AND(AND(AND(=($1, $18), =($2, $19)), =($0, $21)), OR(AND($3, $4, $36), AND($5, $6, $37), AND($7, $8, $38)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[AND(=($0, $13), OR(AND($1, $24), AND($2, $25), AND($3, $26)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveAggregate(group=[{37}], agg#0=[sum($4)], agg#1=[count($4)], agg#2=[sum($28)], agg#3=[count($28)], agg#4=[sum($27)], agg#5=[count($27)]) + HiveJoin(condition=[=($2, $38)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(AND(=($1, $21), =($3, $26)), OR(AND($15, $16, $8), AND($17, $18, $9), AND($19, $20, $10))), OR(AND($30, $5), AND($31, $6), AND($32, $7)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, reason]], table:alias=[reason]) - HiveJoin(condition=[=($15, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wp_web_page_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) - HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveJoin(condition=[AND(=($9, $0), =($11, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_page_sk=[$12], ws_order_number=[$17], ws_quantity=[$18], BETWEEN=[BETWEEN(false, $33, 100, 200)], BETWEEN6=[BETWEEN(false, $33, 150, 300)], BETWEEN7=[BETWEEN(false, $33, 50, 250)], BETWEEN8=[BETWEEN(false, $21, 100, 150)], BETWEEN9=[BETWEEN(false, $21, 50, 100)], BETWEEN10=[BETWEEN(false, $21, 150, 200)]) + HiveFilter(condition=[AND(OR(<=(100, $21), <=($21, 150), IS NOT NULL($21), <=($21, 200)), OR(<=(100, $33), <=($33, 200), IS NOT NULL($33), <=($33, 300), <=($33, 250)), IS NOT NULL($3), IS NOT NULL($17), IS NOT NULL($12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$1], cd_education_status=[$2], ==[$3], =4=[$4], =5=[$5], =6=[$6], =7=[$7], =8=[$8], wr_item_sk=[$9], wr_refunded_cdemo_sk=[$10], wr_refunded_addr_sk=[$11], wr_returning_cdemo_sk=[$12], wr_reason_sk=[$13], wr_order_number=[$14], wr_fee=[$15], wr_refunded_cash=[$16], ca_address_sk=[$17], IN=[$18], IN2=[$19], IN3=[$20], cd_demo_sk0=[$21], cd_marital_status0=[$22], cd_education_status0=[$23], r_reason_sk=[$24], r_reason_desc=[$25]) + HiveJoin(condition=[=($24, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($1, $22), =($2, $23)), =($0, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($12, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($13), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($12))]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_web_page_sk=[$12], ws_order_number=[$17], ws_quantity=[$18], BETWEEN=[BETWEEN(false, $33, 100, 200)], BETWEEN6=[BETWEEN(false, $33, 150, 300)], BETWEEN7=[BETWEEN(false, $33, 50, 250)], BETWEEN8=[BETWEEN(false, $21, 100, 150)], BETWEEN9=[BETWEEN(false, $21, 50, 100)], BETWEEN10=[BETWEEN(false, $21, 150, 200)]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $21, 100, 150), BETWEEN(false, $21, 50, 100), BETWEEN(false, $21, 150, 200)), OR(BETWEEN(false, $33, 100, 200), BETWEEN(false, $33, 150, 300), BETWEEN(false, $33, 50, 250)), IS NOT NULL($3), IS NOT NULL($17), IS NOT NULL($12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, reason]], table:alias=[reason]) + HiveProject(wp_web_page_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_page]], table:alias=[web_page]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out index 811a5184aa..a08c5a4be1 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query13.q.out @@ -114,24 +114,24 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], $f3=[CAST($4):DECIMAL(17, 2)]) - HiveAggregate(group=[{}], agg#0=[sum($19)], agg#1=[count($19)], agg#2=[sum($20)], agg#3=[count($20)], agg#4=[sum($21)], agg#5=[count($21)]) - HiveJoin(condition=[AND(=($0, $16), OR(AND($1, $2, $25, $12), AND($3, $4, $26, $13), AND($5, $6, $27, $13)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveJoin(condition=[AND(=($11, $0), OR(AND($1, $15), AND($2, $16), AND($3, $17)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) - HiveFilter(condition=[IN($3, 3, 1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{}], agg#0=[sum($11)], agg#1=[count($11)], agg#2=[sum($12)], agg#3=[count($12)], agg#4=[sum($13)], agg#5=[count($13)]) + HiveJoin(condition=[AND(=($9, $25), OR(AND($1, $2, $17, $26), AND($3, $4, $18, $27), AND($5, $6, $19, $27)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) + HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[AND(=($3, $14), OR(AND($15, $7), AND($16, $8), AND($17, $9)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100, 200)], BETWEEN9=[BETWEEN(false, $22, 150, 300)], BETWEEN10=[BETWEEN(false, $22, 50, 250)], BETWEEN11=[BETWEEN(false, $13, 100, 150)], BETWEEN12=[BETWEEN(false, $13, 50, 100)], BETWEEN13=[BETWEEN(false, $13, 150, 200)]) + HiveFilter(condition=[AND(OR(<=(100, $13), <=($13, 150), IS NOT NULL($13), <=($13, 200)), OR(<=(100, $22), <=($22, 200), IS NOT NULL($22), <=($22, 300), <=($22, 250)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2001)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_quantity=[$10], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], BETWEEN=[BETWEEN(false, $22, 100, 200)], BETWEEN9=[BETWEEN(false, $22, 150, 300)], BETWEEN10=[BETWEEN(false, $22, 50, 250)], BETWEEN11=[BETWEEN(false, $13, 100, 150)], BETWEEN12=[BETWEEN(false, $13, 50, 100)], BETWEEN13=[BETWEEN(false, $13, 150, 200)]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 100, 200), BETWEEN(false, $22, 150, 300), BETWEEN(false, $22, 50, 250)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($5), IS NOT NULL($6), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) + HiveFilter(condition=[IN($3, 3, 1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out index f304762b14..5e165e8e70 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query21.q.out @@ -69,7 +69,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) - HiveFilter(condition=[CASE(>($2, 0), BETWEEN(false, /(CAST($3):DOUBLE, CAST($2):DOUBLE), 6.66667E-1, 1.5E0), null)]) + HiveFilter(condition=[AND(CASE(>($2, 0), <=(6.66667E-1, /(CAST($3):DOUBLE, CAST($2):DOUBLE)), null), CASE(>($2, 0), <=(/(CAST($3):DOUBLE, CAST($2):DOUBLE), 1.5E0), null))]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) HiveProject($f0=[$1], $f1=[$10], $f2=[CASE($7, $5, 0)], $f3=[CASE($8, $5, 0)]) HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out index 6e7384e1dd..987f8d3eea 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out @@ -87,7 +87,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($5), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), OR(BETWEEN(false, $9, 1, 3), BETWEEN(false, $9, 25, 28)))]) + HiveFilter(condition=[AND(IN($6, 2000, 2001, 2002), OR(<=(1, $9), <=($9, 3), <=(25, $9), <=($9, 28)), OR(BETWEEN(false, $9, 1, 3), BETWEEN(false, $9, 25, 28)))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'>10000', _UTF-16LE'unknown'), >($4, 0), CASE(>($4, 0), >(/(CAST($3):DOUBLE, CAST($4):DOUBLE), 1.2), null))]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out index 12d59348f7..3188d82f28 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query48.q.out @@ -141,20 +141,20 @@ POSTHOOK: Input: default@store POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: -HiveAggregate(group=[{}], agg#0=[sum($9)]) - HiveJoin(condition=[AND(=($8, $0), OR(AND($1, $10), AND($2, $11), AND($3, $12)))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveAggregate(group=[{}], agg#0=[sum($8)]) + HiveJoin(condition=[AND(=($7, $0), OR(AND($1, $9), AND($2, $10), AND($3, $11)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0]) - HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'M'), =($3, _UTF-16LE'4 yr Degree'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_addr_sk=[$6], ss_quantity=[$10], BETWEEN=[BETWEEN(false, $22, 0, 2000)], BETWEEN6=[BETWEEN(false, $22, 150, 3000)], BETWEEN7=[BETWEEN(false, $22, 50, 25000)]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), OR(BETWEEN(false, $22, 0, 2000), BETWEEN(false, $22, 150, 3000), BETWEEN(false, $22, 50, 25000)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($6), IS NOT NULL($0))]) + HiveFilter(condition=[AND(OR(<=(100, $13), <=($13, 150), IS NOT NULL($13), <=($13, 200)), OR(<=(0, $22), <=($22, 2000), IS NOT NULL($22), <=($22, 3000), <=($22, 25000)), OR(BETWEEN(false, $13, 100, 150), BETWEEN(false, $13, 50, 100), BETWEEN(false, $13, 150, 200)), IS NOT NULL($7), IS NOT NULL($4), IS NOT NULL($6), IS NOT NULL($0))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out index 6af3162b28..e33203d93c 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[278][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[274][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[280][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain cbo with my_customers as ( select distinct c_customer_sk @@ -135,71 +135,14 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(segment=[$0], num_customers=[$1], segment_base=[*($0, 50)]) HiveAggregate(group=[{0}], agg#0=[count()]) HiveProject(segment=[CAST(/($1, CAST(50):DECIMAL(10, 0))):INTEGER]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveFilter(condition=[BETWEEN(false, $2, $3, $4)]) - HiveProject(c_customer_sk=[$0], ss_ext_sales_price=[$4], d_month_seq=[$11], _o__c0=[$13], $f0=[$14]) + HiveAggregate(group=[{5}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$4], $f1=[$5], ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_ext_sales_price=[$2], ca_address_sk=[$8], ca_county=[$9], ca_state=[$10], s_county=[$11], s_state=[$12], d_date_sk=[$6], d_month_seq=[$7], cnt=[$3], $f00=[$13]) - HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_sales_price=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) - HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{0}]) - HiveProject($f0=[+($3, 1)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) - HiveAggregate(group=[{0, 1}]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) - HiveUnion(all=[true]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($8, 3), =($6, 1999))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Jewelry'), =($10, _UTF-16LE'consignment'))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], d_month_seq=[$3]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4]) - HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_county=[$23], s_state=[$24]) - HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{0}]) - HiveProject($f0=[+($3, 1)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[true], joinType=[right], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{0}]) - HiveProject($f0=[+($3, 3)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_sales_price=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(cnt=[$0]) HiveFilter(condition=[<=(sq_count_check($0), 1)]) HiveProject(cnt=[$0]) @@ -209,4 +152,59 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject($f0=[+($3, 3)]) HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cnt=[$0]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_county=[$3], ca_state=[$4], s_county=[$5], s_state=[$6]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) + HiveAggregate(group=[{0, 1}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 3), =($6, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Jewelry'), =($10, _UTF-16LE'consignment'))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(d_date_sk=[$0], d_month_seq=[$1], $f0=[$2], $f00=[$3]) + HiveJoin(condition=[<=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[<=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 3)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out index 3e89c2da75..aa4fe93d80 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query58.q.out @@ -142,7 +142,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(item_id=[$0], ss_item_rev=[$3], ss_dev=[*(/(/($3, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$5], ws_dev=[*(/(/($5, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($3, $1), $5), CAST(3):DECIMAL(10, 0))]) - HiveJoin(condition=[AND(AND(AND(AND(=($0, $4), BETWEEN(false, $3, $6, $7)), BETWEEN(false, $1, $6, $7)), BETWEEN(false, $5, *(0.9, $3), *(1.1, $3))), BETWEEN(false, $5, *(0.9, $1), *(1.1, $1)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(AND(AND(=($0, $4), BETWEEN(false, $3, $6, $7)), BETWEEN(false, $1, $6, $7)), BETWEEN(false, $5, *(0.9, $3), *(1.1, $1))), BETWEEN(false, $5, *(0.9, $1), *(1.1, $3)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(AND(=($2, $0), BETWEEN(false, $3, *(0.9, $1), *(1.1, $1))), BETWEEN(false, $1, *(0.9, $3), *(1.1, $3)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{7}], agg#0=[sum($2)]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out index 4261d0655d..75e69cd566 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out @@ -292,7 +292,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50))]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 36, 45))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2000)]) @@ -351,7 +351,7 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50))]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 36, 45))]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[=($6, 2001)]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out index 6471345141..91aad930ba 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query85.q.out @@ -184,31 +184,32 @@ CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], _o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 20)=[substr($0, 1, 20)]) - HiveAggregate(group=[{1}], agg#0=[sum($30)], agg#1=[count($30)], agg#2=[sum($26)], agg#3=[count($26)], agg#4=[sum($25)], agg#5=[count($25)]) - HiveJoin(condition=[=($0, $23)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{1}], agg#0=[sum($5)], agg#1=[count($5)], agg#2=[sum($29)], agg#3=[count($29)], agg#4=[sum($28)], agg#5=[count($28)]) + HiveJoin(condition=[=($0, $26)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) HiveTableScan(table=[[default, reason]], table:alias=[reason]) - HiveJoin(condition=[AND(AND(AND(=($1, $15), =($2, $16)), =($0, $18)), OR(AND($3, $4, $32), AND($5, $6, $33), AND($7, $8, $34)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) - HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[AND(=($0, $10), OR(AND($1, $20), AND($2, $21), AND($3, $22)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) - HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveJoin(condition=[=($12, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 1998)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveJoin(condition=[AND(=($9, $0), =($10, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(AND(=($1, $20), =($2, $25)), OR(AND($14, $15, $7), AND($16, $17, $8), AND($18, $19, $9))), OR(AND($29, $4), AND($30, $5), AND($31, $6)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], BETWEEN=[BETWEEN(false, $33, 100, 200)], BETWEEN6=[BETWEEN(false, $33, 150, 300)], BETWEEN7=[BETWEEN(false, $33, 50, 250)], BETWEEN8=[BETWEEN(false, $21, 100, 150)], BETWEEN9=[BETWEEN(false, $21, 50, 100)], BETWEEN10=[BETWEEN(false, $21, 150, 200)]) + HiveFilter(condition=[AND(OR(<=(100, $21), <=($21, 150), IS NOT NULL($21), <=($21, 200)), OR(<=(100, $33), <=($33, 200), IS NOT NULL($33), <=($33, 300), <=($33, 250)), IS NOT NULL($12), IS NOT NULL($0))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 1998)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$1], cd_education_status=[$2], ==[$3], =4=[$4], =5=[$5], =6=[$6], =7=[$7], =8=[$8], wr_item_sk=[$9], wr_refunded_cdemo_sk=[$10], wr_refunded_addr_sk=[$11], wr_returning_cdemo_sk=[$12], wr_reason_sk=[$13], wr_order_number=[$14], wr_fee=[$15], wr_refunded_cash=[$16], ca_address_sk=[$17], IN=[$18], IN2=[$19], IN3=[$20], cd_demo_sk0=[$21], cd_marital_status0=[$22], cd_education_status0=[$23]) + HiveJoin(condition=[AND(AND(=($1, $22), =($2, $23)), =($0, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($12, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_item_sk=[$2], wr_refunded_cdemo_sk=[$4], wr_refunded_addr_sk=[$6], wr_returning_cdemo_sk=[$8], wr_reason_sk=[$12], wr_order_number=[$13], wr_fee=[$18], wr_refunded_cash=[$20]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($6), IS NOT NULL($12))]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_order_number=[$17], ws_quantity=[$18], BETWEEN=[BETWEEN(false, $33, 100, 200)], BETWEEN6=[BETWEEN(false, $33, 150, 300)], BETWEEN7=[BETWEEN(false, $33, 50, 250)], BETWEEN8=[BETWEEN(false, $21, 100, 150)], BETWEEN9=[BETWEEN(false, $21, 50, 100)], BETWEEN10=[BETWEEN(false, $21, 150, 200)]) - HiveFilter(condition=[AND(OR(BETWEEN(false, $21, 100, 150), BETWEEN(false, $21, 50, 100), BETWEEN(false, $21, 150, 200)), OR(BETWEEN(false, $33, 100, 200), BETWEEN(false, $33, 150, 300), BETWEEN(false, $33, 50, 250)), IS NOT NULL($12), IS NOT NULL($0))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'))]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out index ac81e3794c..616ed6bcde 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out @@ -115,13 +115,13 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Reducer 7 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -136,80 +136,80 @@ Stage-0 <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_31] Group By Operator [GBY_30] (rows=1 width=256) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col5)","count(_col5)","sum(_col6)","count(_col6)","sum(_col7)","count(_col7)"] - Select Operator [SEL_29] (rows=40950 width=44) - Output:["_col5","_col6","_col7"] - Filter Operator [FIL_28] (rows=40950 width=44) - predicate:((_col22 and _col23 and _col11 and _col15) or (_col24 and _col25 and _col12 and _col16) or (_col26 and _col27 and _col13 and _col16)) - Merge Join Operator [MERGEJOIN_97] (rows=218403 width=44) - Conds:RS_25._col2=RS_117._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col15","_col16","_col22","_col23","_col24","_col25","_col26","_col27"] + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col6)","count(_col6)"] + Select Operator [SEL_29] (rows=368553 width=44) + Output:["_col4","_col5","_col6"] + Filter Operator [FIL_28] (rows=368553 width=44) + predicate:((_col19 and _col20 and _col10 and _col26) or (_col21 and _col22 and _col11 and _col27) or (_col23 and _col24 and _col12 and _col27)) + Merge Join Operator [MERGEJOIN_97] (rows=1965626 width=44) + Conds:RS_25._col2=RS_117._col0(Inner),Output:["_col4","_col5","_col6","_col10","_col11","_col12","_col19","_col20","_col21","_col22","_col23","_col24","_col26","_col27"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_116] (rows=265971 width=28) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_115] (rows=265971 width=183) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) - TableScan [TS_12] (rows=1861800 width=183) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + Select Operator [SEL_116] (rows=1309 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_115] (rows=1309 width=8) + predicate:(hd_dep_count) IN (3, 1) + TableScan [TS_12] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2 - Filter Operator [FIL_24] (rows=218403 width=44) - predicate:((_col18 and _col8) or (_col19 and _col9) or (_col20 and _col10)) - Merge Join Operator [MERGEJOIN_96] (rows=291204 width=44) - Conds:RS_21._col4=RS_114._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col16","_col18","_col19","_col20"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] - PartitionCols:_col0 - Select Operator [SEL_113] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_112] (rows=3529412 width=187) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) - TableScan [TS_9] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_95] (rows=3300311 width=104) - Conds:RS_18._col3=RS_111._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col16"] + Merge Join Operator [MERGEJOIN_96] (rows=10811694 width=36) + Conds:RS_22._col1=RS_114._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col10","_col11","_col12","_col19","_col20","_col21","_col22","_col23","_col24"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_114] + PartitionCols:_col0 + Select Operator [SEL_113] (rows=265971 width=28) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_112] (rows=265971 width=183) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) + TableScan [TS_9] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col1 + Filter Operator [FIL_21] (rows=10811694 width=36) + predicate:((_col15 and _col7) or (_col16 and _col8) or (_col17 and _col9)) + Merge Join Operator [MERGEJOIN_95] (rows=14415593 width=36) + Conds:RS_18._col3=RS_111._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_111] PartitionCols:_col0 - Select Operator [SEL_110] (rows=1309 width=12) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_109] (rows=1309 width=8) - predicate:(hd_dep_count) IN (3, 1) - TableScan [TS_6] (rows=7200 width=8) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] + Select Operator [SEL_110] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_109] (rows=3529412 width=187) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) + TableScan [TS_6] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_94] (rows=18152968 width=233) - Conds:RS_100._col0=RS_108._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 1 [SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_94] (rows=163376714 width=233) + Conds:RS_108._col0=RS_100._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_100] PartitionCols:_col0 Select Operator [SEL_99] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_98] (rows=652 width=8) predicate:(d_year = 2001) - TableScan [TS_0] (rows=73049 width=8) + TableScan [TS_3] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 8 [SIMPLE_EDGE] vectorized + <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_108] PartitionCols:_col0 - Select Operator [SEL_107] (rows=50840141 width=257) + Select Operator [SEL_107] (rows=457561292 width=257) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_106] (rows=50840141 width=450) - predicate:((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=450) + Filter Operator [FIL_106] (rows=457561292 width=450) + predicate:(((ss_net_profit >= 100) or (ss_net_profit <= 200) or ss_net_profit is not null or (ss_net_profit <= 300) or (ss_net_profit <= 250)) and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or ss_sales_price is not null or (ss_sales_price <= 200)) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=450) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 7 [BROADCAST_EDGE] vectorized + <-Reducer 8 [BROADCAST_EDGE] vectorized BROADCAST [RS_105] Group By Operator [GBY_104] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_103] Group By Operator [GBY_102] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out index 67fdc85be3..7c68a294ea 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query21.q.out @@ -83,12 +83,12 @@ Stage-0 File Output Operator [FS_91] Limit [LIM_90] (rows=100 width=216) Number of rows:100 - Select Operator [SEL_89] (rows=231983 width=216) + Select Operator [SEL_89] (rows=115991 width=216) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_88] - Filter Operator [FIL_87] (rows=231983 width=216) - predicate:CASE WHEN ((_col2 > 0L)) THEN ((UDFToDouble(_col3) / UDFToDouble(_col2)) BETWEEN 0.666667D AND 1.5D) ELSE (null) END + Filter Operator [FIL_87] (rows=115991 width=216) + predicate:(CASE WHEN ((_col2 > 0L)) THEN (((UDFToDouble(_col3) / UDFToDouble(_col2)) <= 1.5D)) ELSE (null) END and CASE WHEN ((_col2 > 0L)) THEN ((0.666667D <= (UDFToDouble(_col3) / UDFToDouble(_col2)))) ELSE (null) END) Group By Operator [GBY_86] (rows=463966 width=216) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out index 5b0ded736f..e2c3631141 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query34.q.out @@ -153,7 +153,7 @@ Stage-0 Select Operator [SEL_103] (rows=595 width=4) Output:["_col0"] Filter Operator [FIL_102] (rows=595 width=12) - predicate:((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002)) + predicate:(((d_dom >= 1) or (d_dom <= 3) or (d_dom >= 25) or (d_dom <= 28)) and (d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002)) TableScan [TS_5] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] <-Map 4 [SIMPLE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out index af9d44207d..93bafabefd 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out @@ -143,12 +143,12 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Reducer 8 (BROADCAST_EDGE) +Map 1 <- Reducer 7 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -161,13 +161,13 @@ Stage-0 <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_24] Group By Operator [GBY_23] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(_col4)"] - Select Operator [SEL_22] (rows=20247 width=24) - Output:["_col4"] - Filter Operator [FIL_21] (rows=20247 width=24) - predicate:((_col10 and _col5) or (_col11 and _col6) or (_col12 and _col7)) - Merge Join Operator [MERGEJOIN_73] (rows=26999 width=24) - Conds:RS_18._col3=RS_90._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col10","_col11","_col12"] + Output:["_col0"],aggregations:["sum(_col3)"] + Select Operator [SEL_22] (rows=170127 width=24) + Output:["_col3"] + Filter Operator [FIL_21] (rows=170127 width=24) + predicate:((_col10 and _col4) or (_col11 and _col5) or (_col12 and _col6)) + Merge Join Operator [MERGEJOIN_73] (rows=226838 width=24) + Conds:RS_18._col2=RS_90._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col10","_col11","_col12"] <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_90] PartitionCols:_col0 @@ -179,50 +179,50 @@ Stage-0 default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_72] (rows=305980 width=12) - Conds:RS_15._col1=RS_79._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7"] - <-Map 7 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_79] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_72] (rows=2570826 width=12) + Conds:RS_15._col1=RS_87._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_78] (rows=652 width=4) + Select Operator [SEL_86] (rows=29552 width=4) Output:["_col0"] - Filter Operator [FIL_77] (rows=652 width=8) - predicate:(d_year = 1998) - TableScan [TS_6] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + Filter Operator [FIL_85] (rows=29552 width=183) + predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M')) + TableScan [TS_6] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_71] (rows=856943 width=12) - Conds:RS_76._col0=RS_87._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_76] + Merge Join Operator [MERGEJOIN_71] (rows=57024544 width=22) + Conds:RS_84._col0=RS_76._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_76] PartitionCols:_col0 - Select Operator [SEL_75] (rows=29552 width=4) + Select Operator [SEL_75] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_74] (rows=29552 width=183) - predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M')) - TableScan [TS_0] (rows=1861800 width=183) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_87] - PartitionCols:_col1 - Select Operator [SEL_86] (rows=53235296 width=27) + Filter Operator [FIL_74] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_84] + PartitionCols:_col0 + Select Operator [SEL_83] (rows=159705893 width=27) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_85] (rows=53235296 width=233) - predicate:((ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=233) + Filter Operator [FIL_82] (rows=159705893 width=233) + predicate:(((ss_net_profit >= 0) or (ss_net_profit <= 2000) or ss_net_profit is not null or (ss_net_profit <= 3000) or (ss_net_profit <= 25000)) and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or ss_sales_price is not null or (ss_sales_price <= 200)) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=233) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_84] - Group By Operator [GBY_83] (rows=1 width=12) + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_81] + Group By Operator [GBY_80] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_82] - Group By Operator [GBY_81] (rows=1 width=12) + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_79] + Group By Operator [GBY_78] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_80] (rows=652 width=4) + Select Operator [SEL_77] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_78] + Please refer to the previous Select Operator [SEL_75] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out index 8d10899c63..526fa814b9 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[278][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[277][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 15' is a cross product -Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[274][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[280][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[279][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 15' is a cross product +Warning: Shuffle Join MERGEJOIN[282][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -133,277 +133,289 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 18 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS) -Map 24 <- Reducer 26 (BROADCAST_EDGE), Union 19 (CONTAINS) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 12 <- Map 11 (SIMPLE_EDGE) +Map 22 <- Reducer 30 (BROADCAST_EDGE), Union 23 (CONTAINS) +Map 28 <- Reducer 30 (BROADCAST_EDGE), Union 23 (CONTAINS) +Reducer 10 <- Map 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 9 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 11 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 11 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 20 <- Map 25 (SIMPLE_EDGE), Union 19 (SIMPLE_EDGE) -Reducer 21 <- Map 27 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 28 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Reducer 22 (SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) -Reducer 4 <- Map 29 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 31 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 9 (SIMPLE_EDGE) +Reducer 15 <- Map 33 (CUSTOM_SIMPLE_EDGE), Reducer 14 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 9 (SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 24 <- Map 29 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE) +Reducer 25 <- Map 31 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Map 32 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_353] - Limit [LIM_352] (rows=1 width=16) + Reducer 8 vectorized + File Output Operator [FS_363] + Limit [LIM_362] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_351] (rows=1 width=16) + Select Operator [SEL_361] (rows=1 width=16) Output:["_col0","_col1","_col2"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] - Select Operator [SEL_349] (rows=1 width=16) + <-Reducer 7 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_360] + Select Operator [SEL_359] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_348] (rows=1 width=12) + Group By Operator [GBY_358] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_357] PartitionCols:_col0 - Group By Operator [GBY_346] (rows=1 width=12) + Group By Operator [GBY_356] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_345] (rows=1 width=116) + Select Operator [SEL_355] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_344] (rows=1 width=116) + Group By Operator [GBY_354] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 7 [SIMPLE_EDGE] + <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_118] PartitionCols:_col0 - Group By Operator [GBY_117] (rows=312 width=116) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_116] (rows=624257222 width=127) - Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=624257222 width=127) - predicate:_col2 BETWEEN _col3 AND _col4 - Select Operator [SEL_114] (rows=5618315000 width=127) - Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_279] (rows=5618315000 width=127) - Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Merge Join Operator [MERGEJOIN_277] (rows=25 width=4) - Conds:(Right Outer),Output:["_col0"] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_335] - Group By Operator [GBY_334] (rows=25 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] - PartitionCols:_col0 - Group By Operator [GBY_298] (rows=25 width=4) - Output:["_col0"],keys:_col0 - Select Operator [SEL_295] (rows=50 width=12) - Output:["_col0"] - Filter Operator [FIL_293] (rows=50 width=12) - predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_26] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_343] - Select Operator [SEL_342] (rows=1 width=8) - Filter Operator [FIL_341] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_340] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - Group By Operator [GBY_338] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_337] (rows=25 width=4) - Group By Operator [GBY_336] (rows=25 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] + Group By Operator [GBY_117] (rows=1 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 + Merge Join Operator [MERGEJOIN_284] (rows=998811 width=4) + Conds:RS_113._col0=RS_114._col0(Inner),Output:["_col2","_col5"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_114] + PartitionCols:_col0 + Select Operator [SEL_103] (rows=5072841 width=12) + Output:["_col0"] + Filter Operator [FIL_102] (rows=5072841 width=12) + predicate:(_col1 <= _col3) + Merge Join Operator [MERGEJOIN_282] (rows=15218525 width=12) + Conds:(Inner),Output:["_col0","_col1","_col3"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_99] + Filter Operator [FIL_98] (rows=608741 width=12) + predicate:(_col2 <= _col1) + Merge Join Operator [MERGEJOIN_279] (rows=1826225 width=12) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Map 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_351] + Select Operator [SEL_350] (rows=73049 width=8) + Output:["_col0","_col1"] + TableScan [TS_77] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_349] + Group By Operator [GBY_348] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_309] + PartitionCols:_col0 + Group By Operator [GBY_305] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_301] (rows=50 width=12) + Output:["_col0"] + Filter Operator [FIL_298] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999)) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_353] + Group By Operator [GBY_352] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_310] + PartitionCols:_col0 + Group By Operator [GBY_306] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_302] (rows=50 width=12) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_298] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_113] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_283] (rows=8989304 width=4) + Conds:RS_110._col1=RS_111._col0(Inner),Output:["_col0","_col2","_col5"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_111] + PartitionCols:_col0 + Select Operator [SEL_76] (rows=55046 width=4) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_281] (rows=55046 width=4) + Conds:RS_73._col0=RS_347._col1(Inner),Output:["_col5"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_275] (rows=39720279 width=4) + Conds:RS_329._col1, _col2=RS_332._col0, _col1(Inner),Output:["_col0"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_329] + PartitionCols:_col1, _col2 + Select Operator [SEL_328] (rows=40000000 width=188) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_327] (rows=40000000 width=188) + predicate:(ca_county is not null and ca_state is not null) + TableScan [TS_33] (rows=40000000 width=188) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] + PartitionCols:_col0, _col1 + Select Operator [SEL_331] (rows=1704 width=184) + Output:["_col0","_col1"] + Filter Operator [FIL_330] (rows=1704 width=184) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_36] (rows=1704 width=184) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] + <-Reducer 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_347] + PartitionCols:_col1 + Select Operator [SEL_346] (rows=55046 width=8) + Output:["_col0","_col1"] + Group By Operator [GBY_345] (rows=55046 width=8) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col0, _col1 + Group By Operator [GBY_66] (rows=55046 width=8) + Output:["_col0","_col1"],keys:_col6, _col5 + Merge Join Operator [MERGEJOIN_278] (rows=110092 width=8) + Conds:RS_62._col1=RS_344._col0(Inner),Output:["_col5","_col6"] + <-Map 32 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_344] + PartitionCols:_col0 + Select Operator [SEL_343] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_342] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_53] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_62] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_277] (rows=110092 width=0) + Conds:RS_59._col2=RS_341._col0(Inner),Output:["_col1"] + <-Map 31 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_341] PartitionCols:_col0 - Group By Operator [GBY_299] (rows=25 width=4) - Output:["_col0"],keys:_col0 - Select Operator [SEL_296] (rows=50 width=12) - Output:["_col0"] - Please refer to the previous Filter Operator [FIL_293] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_112] - Select Operator [SEL_107] (rows=224732600 width=119) - Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_278] (rows=224732600 width=119) - Conds:(Left Outer),Output:["_col2","_col4","_col7","_col13"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_305] - Group By Operator [GBY_303] (rows=25 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] - PartitionCols:_col0 - Group By Operator [GBY_297] (rows=25 width=4) - Output:["_col0"],keys:_col0 - Select Operator [SEL_294] (rows=50 width=12) - Output:["_col0"] - Please refer to the previous Filter Operator [FIL_293] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_104] - Merge Join Operator [MERGEJOIN_276] (rows=8989304 width=8) - Conds:RS_101._col5=RS_102._col0(Inner),Output:["_col2","_col4","_col7"] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_102] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_275] (rows=39720279 width=4) - Conds:RS_330._col1, _col2=RS_333._col0, _col1(Inner),Output:["_col0"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] - PartitionCols:_col1, _col2 - Select Operator [SEL_329] (rows=40000000 width=188) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_328] (rows=40000000 width=188) - predicate:(ca_county is not null and ca_state is not null) - TableScan [TS_74] (rows=40000000 width=188) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] - PartitionCols:_col0, _col1 - Select Operator [SEL_332] (rows=1704 width=184) - Output:["_col0","_col1"] - Filter Operator [FIL_331] (rows=1704 width=184) - predicate:(s_county is not null and s_state is not null) - TableScan [TS_77] (rows=1704 width=184) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_101] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_274] (rows=8989304 width=12) - Conds:RS_98._col0=RS_327._col0(Inner),Output:["_col2","_col4","_col5","_col7"] - <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] - PartitionCols:_col0 - Select Operator [SEL_326] (rows=73049 width=8) - Output:["_col0","_col1"] - TableScan [TS_72] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_98] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_273] (rows=8989304 width=8) - Conds:RS_95._col1=RS_325._col0(Inner),Output:["_col0","_col2","_col4","_col5"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_95] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_269] (rows=525327388 width=114) - Conds:(Inner),Output:["_col0","_col1","_col2"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_292] - Select Operator [SEL_291] (rows=525327388 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_290] (rows=525327388 width=114) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_311] - Select Operator [SEL_310] (rows=1 width=8) - Filter Operator [FIL_309] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_308] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_307] - Group By Operator [GBY_306] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_304] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_303] - <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] + Select Operator [SEL_340] (rows=453 width=4) + Output:["_col0"] + Filter Operator [FIL_339] (rows=453 width=186) + predicate:((i_category = 'Jewelry') and (i_class = 'consignment')) + TableScan [TS_50] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_276] (rows=11665117 width=7) + Conds:Union 23._col0=RS_335._col0(Inner),Output:["_col1","_col2"] + <-Map 29 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_335] + PartitionCols:_col0 + Select Operator [SEL_334] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_333] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999)) + TableScan [TS_47] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] vectorized + Reduce Output Operator [RS_369] + PartitionCols:_col0 + Select Operator [SEL_368] (rows=285117831 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_367] (rows=285117831 width=11) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_285] (rows=287989836 width=11) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_365] + Group By Operator [GBY_364] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_338] + Group By Operator [GBY_337] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_336] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_334] + <-Map 28 [CONTAINS] vectorized + Reduce Output Operator [RS_372] + PartitionCols:_col0 + Select Operator [SEL_371] (rows=143930993 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_370] (rows=143930993 width=11) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_290] (rows=144002668 width=11) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_366] + Please refer to the previous Group By Operator [GBY_364] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_110] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_280] (rows=525327388 width=114) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_326] + Select Operator [SEL_325] (rows=1 width=8) + Filter Operator [FIL_324] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_323] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_322] + Group By Operator [GBY_321] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_320] (rows=25 width=4) + Group By Operator [GBY_319] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_308] PartitionCols:_col0 - Group By Operator [GBY_324] (rows=55046 width=8) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col0, _col1 - Group By Operator [GBY_68] (rows=55046 width=8) - Output:["_col0","_col1"],keys:_col5, _col6 - Merge Join Operator [MERGEJOIN_272] (rows=110092 width=8) - Conds:RS_64._col1=RS_323._col0(Inner),Output:["_col5","_col6"] - <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] - PartitionCols:_col0 - Select Operator [SEL_322] (rows=80000000 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_321] (rows=80000000 width=8) - predicate:c_current_addr_sk is not null - TableScan [TS_55] (rows=80000000 width=8) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_64] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_271] (rows=110092 width=0) - Conds:RS_61._col2=RS_320._col0(Inner),Output:["_col1"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] - PartitionCols:_col0 - Select Operator [SEL_319] (rows=453 width=4) - Output:["_col0"] - Filter Operator [FIL_318] (rows=453 width=186) - predicate:((i_category = 'Jewelry') and (i_class = 'consignment')) - TableScan [TS_52] (rows=462000 width=186) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_270] (rows=11665117 width=7) - Conds:Union 19._col0=RS_314._col0(Inner),Output:["_col1","_col2"] - <-Map 25 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_314] - PartitionCols:_col0 - Select Operator [SEL_313] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_312] (rows=50 width=12) - predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_49] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Union 19 [SIMPLE_EDGE] - <-Map 18 [CONTAINS] vectorized - Reduce Output Operator [RS_359] - PartitionCols:_col0 - Select Operator [SEL_358] (rows=285117831 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_357] (rows=285117831 width=11) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_59_date_dim_d_date_sk_min) AND DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_280] (rows=287989836 width=11) - Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_355] - Group By Operator [GBY_354] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_317] - Group By Operator [GBY_316] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_315] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_313] - <-Map 24 [CONTAINS] vectorized - Reduce Output Operator [RS_362] - PartitionCols:_col0 - Select Operator [SEL_361] (rows=143930993 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_360] (rows=143930993 width=11) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_59_date_dim_d_date_sk_min) AND DynamicValue(RS_59_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_59_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_285] (rows=144002668 width=11) - Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_356] - Please refer to the previous Group By Operator [GBY_354] + Group By Operator [GBY_304] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_300] (rows=50 width=12) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_298] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_107] + Merge Join Operator [MERGEJOIN_274] (rows=525327388 width=114) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_297] + Select Operator [SEL_296] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_295] (rows=525327388 width=114) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_318] + Select Operator [SEL_317] (rows=1 width=8) + Filter Operator [FIL_316] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_315] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_314] + Group By Operator [GBY_313] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_312] (rows=25 width=4) + Group By Operator [GBY_311] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_307] + PartitionCols:_col0 + Group By Operator [GBY_303] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_299] (rows=50 width=12) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_298] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out index daeda3fdc6..09f5c9b9bd 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query58.q.out @@ -181,7 +181,7 @@ Stage-0 Select Operator [SEL_156] (rows=1 width=884) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Filter Operator [FIL_152] (rows=1 width=660) - predicate:(_col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7 and _col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3)) + predicate:(_col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7 and _col5 BETWEEN (0.9 * _col1) AND (1.1 * _col3) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col1)) Merge Join Operator [MERGEJOIN_422] (rows=384 width=660) Conds:RS_149._col0=RS_467._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out index 3aee07ad68..30f9b6bb2b 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out @@ -265,22 +265,22 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 20 (BROADCAST_EDGE) +Map 1 <- Reducer 48 (BROADCAST_EDGE) Map 38 <- Reducer 20 (BROADCAST_EDGE) -Map 50 <- Reducer 36 (BROADCAST_EDGE) -Map 51 <- Reducer 36 (BROADCAST_EDGE) +Map 52 <- Reducer 49 (BROADCAST_EDGE) +Map 53 <- Reducer 36 (BROADCAST_EDGE) Reducer 10 <- Map 47 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Map 47 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 48 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 48 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Map 49 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Map 49 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 12 <- Map 50 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 50 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 51 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 51 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (SIMPLE_EDGE) Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) -Reducer 21 <- Map 19 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE) +Reducer 21 <- Map 19 (SIMPLE_EDGE), Map 52 (SIMPLE_EDGE) Reducer 22 <- Map 47 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Map 37 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Reducer 43 (ONE_TO_ONE_EDGE) @@ -291,17 +291,19 @@ Reducer 28 <- Map 46 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) Reducer 29 <- Map 47 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) Reducer 3 <- Map 47 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 30 <- Map 47 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) -Reducer 31 <- Map 48 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) -Reducer 32 <- Map 48 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 33 <- Map 49 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) -Reducer 34 <- Map 49 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) +Reducer 31 <- Map 50 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 50 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Map 51 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 34 <- Map 51 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) Reducer 35 <- Reducer 34 (SIMPLE_EDGE) Reducer 36 <- Map 19 (CUSTOM_SIMPLE_EDGE) Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) Reducer 4 <- Map 37 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 40 <- Reducer 39 (SIMPLE_EDGE) -Reducer 42 <- Map 41 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE) +Reducer 42 <- Map 41 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) Reducer 43 <- Reducer 42 (SIMPLE_EDGE) +Reducer 48 <- Map 47 (CUSTOM_SIMPLE_EDGE) +Reducer 49 <- Map 47 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 40 (ONE_TO_ONE_EDGE) Reducer 6 <- Map 44 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Map 45 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) @@ -313,216 +315,216 @@ Stage-0 limit:-1 Stage-1 Reducer 18 vectorized - File Output Operator [FS_1061] - Select Operator [SEL_1060] (rows=1991254249 width=1702) + File Output Operator [FS_1069] + Select Operator [SEL_1068] (rows=104583667777 width=1702) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_197] - Select Operator [SEL_196] (rows=1991254249 width=1694) + Select Operator [SEL_196] (rows=104583667777 width=1694) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_195] (rows=1991254249 width=1694) + Filter Operator [FIL_195] (rows=104583667777 width=1694) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_973] (rows=5973762748 width=1694) - Conds:RS_1043._col2, _col1, _col3=RS_1059._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + Merge Join Operator [MERGEJOIN_973] (rows=313751003333 width=1694) + Conds:RS_1050._col2, _col1, _col3=RS_1067._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1043] + SHUFFLE [RS_1050] PartitionCols:_col2, _col1, _col3 - Select Operator [SEL_1042] (rows=2364623 width=1354) + Select Operator [SEL_1049] (rows=21299858 width=1354) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Group By Operator [GBY_1041] (rows=2364623 width=1362) + Group By Operator [GBY_1048] (rows=21299858 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_93] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_92] (rows=2364623 width=1362) + Group By Operator [GBY_92] (rows=21299858 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col8)","sum(_col9)","sum(_col10)"],keys:_col24, _col11, _col25, _col12, _col29, _col31, _col37, _col38, _col39, _col40, _col42, _col43, _col44, _col45 - Merge Join Operator [MERGEJOIN_957] (rows=2364623 width=1155) - Conds:RS_88._col17=RS_1038._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40","_col42","_col43","_col44","_col45"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1038] + Merge Join Operator [MERGEJOIN_957] (rows=21299858 width=1155) + Conds:RS_88._col17=RS_1045._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40","_col42","_col43","_col44","_col45"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1045] PartitionCols:_col0 - Select Operator [SEL_1036] (rows=40000000 width=365) + Select Operator [SEL_1043] (rows=40000000 width=365) Output:["_col0","_col1","_col2","_col3","_col4"] TableScan [TS_44] (rows=40000000 width=365) default@customer_address,ad1,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_88] PartitionCols:_col17 - Merge Join Operator [MERGEJOIN_956] (rows=2364623 width=798) - Conds:RS_85._col5=RS_1037._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1037] + Merge Join Operator [MERGEJOIN_956] (rows=21299858 width=798) + Conds:RS_85._col5=RS_1044._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1044] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1036] + Please refer to the previous Select Operator [SEL_1043] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_85] PartitionCols:_col5 - Filter Operator [FIL_84] (rows=2364623 width=609) + Filter Operator [FIL_84] (rows=21299858 width=609) predicate:(_col33 <> _col35) - Merge Join Operator [MERGEJOIN_955] (rows=2364623 width=609) - Conds:RS_81._col15=RS_1033._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col33","_col35"] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1033] + Merge Join Operator [MERGEJOIN_955] (rows=21299858 width=609) + Conds:RS_81._col15=RS_1040._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col33","_col35"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1040] PartitionCols:_col0 - Select Operator [SEL_1031] (rows=1861800 width=89) + Select Operator [SEL_1038] (rows=1861800 width=89) Output:["_col0","_col1"] TableScan [TS_40] (rows=1861800 width=89) default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_81] PartitionCols:_col15 - Merge Join Operator [MERGEJOIN_954] (rows=2331651 width=523) - Conds:RS_78._col3=RS_1032._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31","_col33"] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1032] + Merge Join Operator [MERGEJOIN_954] (rows=21002853 width=525) + Conds:RS_78._col3=RS_1039._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31","_col33"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1039] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1031] + Please refer to the previous Select Operator [SEL_1038] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_78] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_953] (rows=2299139 width=436) - Conds:RS_75._col18=RS_996._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31"] + Merge Join Operator [MERGEJOIN_953] (rows=20709989 width=438) + Conds:RS_75._col18=RS_980._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31"] <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_996] + PARTITION_ONLY_SHUFFLE [RS_980] PartitionCols:_col0 - Select Operator [SEL_990] (rows=73049 width=8) + Select Operator [SEL_974] (rows=73049 width=8) Output:["_col0","_col1"] TableScan [TS_38] (rows=73049 width=8) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_75] PartitionCols:_col18 - Merge Join Operator [MERGEJOIN_952] (rows=2299139 width=434) - Conds:RS_72._col19=RS_998._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col24","_col25","_col29"] + Merge Join Operator [MERGEJOIN_952] (rows=20709989 width=438) + Conds:RS_72._col19=RS_982._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col24","_col25","_col29"] <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_998] + PARTITION_ONLY_SHUFFLE [RS_982] PartitionCols:_col0 - Select Operator [SEL_992] (rows=73049 width=8) + Select Operator [SEL_976] (rows=73049 width=8) Output:["_col0","_col1"] Please refer to the previous TableScan [TS_38] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_72] PartitionCols:_col19 - Merge Join Operator [MERGEJOIN_951] (rows=2299139 width=432) - Conds:RS_69._col16=RS_1028._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col19","_col24","_col25"] + Merge Join Operator [MERGEJOIN_951] (rows=20709989 width=437) + Conds:RS_69._col16=RS_1035._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col19","_col24","_col25"] <-Map 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1028] + SHUFFLE [RS_1035] PartitionCols:_col0 - Select Operator [SEL_1026] (rows=7200 width=4) + Select Operator [SEL_1033] (rows=7200 width=4) Output:["_col0"] - Filter Operator [FIL_1025] (rows=7200 width=8) + Filter Operator [FIL_1032] (rows=7200 width=8) predicate:hd_income_band_sk is not null TableScan [TS_30] (rows=7200 width=8) default@household_demographics,hd1,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col16 - Merge Join Operator [MERGEJOIN_950] (rows=2299139 width=433) - Conds:RS_66._col4=RS_1027._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] + Merge Join Operator [MERGEJOIN_950] (rows=20709989 width=441) + Conds:RS_66._col4=RS_1034._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] <-Map 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1027] + SHUFFLE [RS_1034] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1026] + Please refer to the previous Select Operator [SEL_1033] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_949] (rows=2299139 width=435) - Conds:RS_63._col6=RS_1023._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] + Merge Join Operator [MERGEJOIN_949] (rows=20709989 width=443) + Conds:RS_63._col6=RS_1030._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] <-Map 45 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1023] + SHUFFLE [RS_1030] PartitionCols:_col0 - Select Operator [SEL_1022] (rows=1704 width=181) + Select Operator [SEL_1029] (rows=1704 width=181) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1021] (rows=1704 width=181) + Filter Operator [FIL_1028] (rows=1704 width=181) predicate:(s_store_name is not null and s_zip is not null) TableScan [TS_27] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_948] (rows=2299139 width=259) - Conds:RS_60._col1, _col7=RS_1019._col0, _col1(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_948] (rows=20709989 width=267) + Conds:RS_60._col1, _col7=RS_1026._col0, _col1(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] <-Map 44 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1019] + SHUFFLE [RS_1026] PartitionCols:_col0, _col1 - Select Operator [SEL_1018] (rows=57591150 width=8) + Select Operator [SEL_1025] (rows=57591150 width=8) Output:["_col0","_col1"] TableScan [TS_25] (rows=57591150 width=8) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col1, _col7 - Merge Join Operator [MERGEJOIN_947] (rows=1394510 width=123) - Conds:RS_57._col1=RS_1017._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_947] (rows=12561347 width=135) + Conds:RS_57._col1=RS_1024._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_945] (rows=1394510 width=123) - Conds:RS_54._col2=RS_1006._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_945] (rows=12561347 width=135) + Conds:RS_54._col2=RS_1011._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1006] + SHUFFLE [RS_1011] PartitionCols:_col0 - Select Operator [SEL_1005] (rows=69376329 width=23) + Select Operator [SEL_1010] (rows=69376329 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1004] (rows=69376329 width=23) + Filter Operator [FIL_1009] (rows=69376329 width=23) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) TableScan [TS_9] (rows=80000000 width=23) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_944] (rows=1608052 width=119) - Conds:RS_51._col0=RS_1002._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_944] (rows=14484878 width=119) + Conds:RS_51._col0=RS_986._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1002] + PARTITION_ONLY_SHUFFLE [RS_986] PartitionCols:_col0 - Select Operator [SEL_997] (rows=652 width=4) + Select Operator [SEL_981] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_991] (rows=652 width=8) + Filter Operator [FIL_975] (rows=652 width=8) predicate:(d_year = 2000) Please refer to the previous TableScan [TS_38] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_51] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_943] (rows=4503592 width=119) - Conds:RS_989._col1=RS_976._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_943] (rows=40567099 width=312) + Conds:RS_998._col1=RS_1001._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_976] + PARTITION_ONLY_SHUFFLE [RS_1001] PartitionCols:_col0 - Select Operator [SEL_975] (rows=518 width=111) + Select Operator [SEL_1000] (rows=4666 width=111) Output:["_col0","_col1"] - Filter Operator [FIL_974] (rows=518 width=312) - predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50) + Filter Operator [FIL_999] (rows=4666 width=311) + predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 36 AND 45) TableScan [TS_3] (rows=462000 width=311) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_989] + SHUFFLE [RS_998] PartitionCols:_col1 - Select Operator [SEL_988] (rows=417313408 width=351) + Select Operator [SEL_997] (rows=417313408 width=351) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_987] (rows=417313408 width=355) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_49_item_i_item_sk_min) AND DynamicValue(RS_49_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_49_item_i_item_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_996] (rows=417313408 width=355) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_52_d1_d_date_sk_min) AND DynamicValue(RS_52_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_52_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=355) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_985] - Group By Operator [GBY_984] (rows=1 width=12) + <-Reducer 48 [BROADCAST_EDGE] vectorized + BROADCAST [RS_995] + Group By Operator [GBY_994] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_982] - Group By Operator [GBY_980] (rows=1 width=12) + <-Map 47 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_992] + Group By Operator [GBY_990] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_977] (rows=518 width=4) + Select Operator [SEL_987] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_975] + Please refer to the previous Select Operator [SEL_981] <-Reducer 40 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_1017] + FORWARD [RS_1024] PartitionCols:_col0 - Select Operator [SEL_1016] (rows=13257 width=4) + Select Operator [SEL_1023] (rows=13257 width=4) Output:["_col0"] - Filter Operator [FIL_1015] (rows=13257 width=228) + Filter Operator [FIL_1022] (rows=13257 width=228) predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1014] (rows=39773 width=228) + Group By Operator [GBY_1021] (rows=39773 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 39 [SIMPLE_EDGE] SHUFFLE [RS_21] @@ -530,195 +532,203 @@ Stage-0 Group By Operator [GBY_20] (rows=6482999 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 Merge Join Operator [MERGEJOIN_946] (rows=183085709 width=227) - Conds:RS_1010._col0, _col1=RS_1012._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + Conds:RS_1017._col0, _col1=RS_1019._col0, _col1(Inner),Output:["_col0","_col2","_col5"] <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1012] + SHUFFLE [RS_1019] PartitionCols:_col0, _col1 - Select Operator [SEL_1011] (rows=28798881 width=120) + Select Operator [SEL_1018] (rows=28798881 width=120) Output:["_col0","_col1","_col2"] TableScan [TS_14] (rows=28798881 width=337) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1010] + SHUFFLE [RS_1017] PartitionCols:_col0, _col1 - Select Operator [SEL_1009] (rows=287989836 width=119) + Select Operator [SEL_1016] (rows=287989836 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1008] (rows=287989836 width=119) + Filter Operator [FIL_1015] (rows=287989836 width=119) predicate:(cs_item_sk BETWEEN DynamicValue(RS_49_item_i_item_sk_min) AND DynamicValue(RS_49_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_49_item_i_item_sk_bloom_filter))) TableScan [TS_12] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_986] - Please refer to the previous Group By Operator [GBY_984] + BROADCAST [RS_1014] + Group By Operator [GBY_1013] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1007] + Group By Operator [GBY_1005] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1002] (rows=4666 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1000] <-Reducer 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1059] + SHUFFLE [RS_1067] PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_1058] (rows=2364623 width=525) + Select Operator [SEL_1066] (rows=21299858 width=525) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_1057] (rows=2364623 width=1362) + Group By Operator [GBY_1065] (rows=21299858 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 34 [SIMPLE_EDGE] SHUFFLE [RS_189] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_188] (rows=2364623 width=1362) + Group By Operator [GBY_188] (rows=21299858 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col8)","sum(_col9)","sum(_col10)"],keys:_col24, _col11, _col25, _col12, _col29, _col31, _col37, _col38, _col39, _col40, _col42, _col43, _col44, _col45 - Merge Join Operator [MERGEJOIN_972] (rows=2364623 width=1155) - Conds:RS_184._col17=RS_1040._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40","_col42","_col43","_col44","_col45"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1040] + Merge Join Operator [MERGEJOIN_972] (rows=21299858 width=1155) + Conds:RS_184._col17=RS_1047._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40","_col42","_col43","_col44","_col45"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1047] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1036] + Please refer to the previous Select Operator [SEL_1043] <-Reducer 33 [SIMPLE_EDGE] SHUFFLE [RS_184] PartitionCols:_col17 - Merge Join Operator [MERGEJOIN_971] (rows=2364623 width=798) - Conds:RS_181._col5=RS_1039._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1039] + Merge Join Operator [MERGEJOIN_971] (rows=21299858 width=798) + Conds:RS_181._col5=RS_1046._col0(Inner),Output:["_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col37","_col38","_col39","_col40"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1046] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1036] + Please refer to the previous Select Operator [SEL_1043] <-Reducer 32 [SIMPLE_EDGE] SHUFFLE [RS_181] PartitionCols:_col5 - Filter Operator [FIL_180] (rows=2364623 width=609) + Filter Operator [FIL_180] (rows=21299858 width=609) predicate:(_col33 <> _col35) - Merge Join Operator [MERGEJOIN_970] (rows=2364623 width=609) - Conds:RS_177._col15=RS_1035._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col33","_col35"] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1035] + Merge Join Operator [MERGEJOIN_970] (rows=21299858 width=609) + Conds:RS_177._col15=RS_1042._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col17","_col24","_col25","_col29","_col31","_col33","_col35"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1042] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1031] + Please refer to the previous Select Operator [SEL_1038] <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_177] PartitionCols:_col15 - Merge Join Operator [MERGEJOIN_969] (rows=2331651 width=523) - Conds:RS_174._col3=RS_1034._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31","_col33"] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1034] + Merge Join Operator [MERGEJOIN_969] (rows=21002853 width=525) + Conds:RS_174._col3=RS_1041._col0(Inner),Output:["_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31","_col33"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1041] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1031] + Please refer to the previous Select Operator [SEL_1038] <-Reducer 30 [SIMPLE_EDGE] SHUFFLE [RS_174] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_968] (rows=2299139 width=436) - Conds:RS_171._col18=RS_1000._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31"] + Merge Join Operator [MERGEJOIN_968] (rows=20709989 width=438) + Conds:RS_171._col18=RS_984._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col24","_col25","_col29","_col31"] <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1000] + PARTITION_ONLY_SHUFFLE [RS_984] PartitionCols:_col0 - Select Operator [SEL_994] (rows=73049 width=8) + Select Operator [SEL_978] (rows=73049 width=8) Output:["_col0","_col1"] Please refer to the previous TableScan [TS_38] <-Reducer 29 [SIMPLE_EDGE] SHUFFLE [RS_171] PartitionCols:_col18 - Merge Join Operator [MERGEJOIN_967] (rows=2299139 width=434) - Conds:RS_168._col19=RS_999._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col24","_col25","_col29"] + Merge Join Operator [MERGEJOIN_967] (rows=20709989 width=438) + Conds:RS_168._col19=RS_983._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col24","_col25","_col29"] <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_999] + PARTITION_ONLY_SHUFFLE [RS_983] PartitionCols:_col0 - Select Operator [SEL_993] (rows=73049 width=8) + Select Operator [SEL_977] (rows=73049 width=8) Output:["_col0","_col1"] Please refer to the previous TableScan [TS_38] <-Reducer 28 [SIMPLE_EDGE] SHUFFLE [RS_168] PartitionCols:_col19 - Merge Join Operator [MERGEJOIN_966] (rows=2299139 width=432) - Conds:RS_165._col16=RS_1030._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col19","_col24","_col25"] + Merge Join Operator [MERGEJOIN_966] (rows=20709989 width=437) + Conds:RS_165._col16=RS_1037._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col17","_col18","_col19","_col24","_col25"] <-Map 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1030] + SHUFFLE [RS_1037] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1026] + Please refer to the previous Select Operator [SEL_1033] <-Reducer 27 [SIMPLE_EDGE] SHUFFLE [RS_165] PartitionCols:_col16 - Merge Join Operator [MERGEJOIN_965] (rows=2299139 width=433) - Conds:RS_162._col4=RS_1029._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] + Merge Join Operator [MERGEJOIN_965] (rows=20709989 width=441) + Conds:RS_162._col4=RS_1036._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] <-Map 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1029] + SHUFFLE [RS_1036] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1026] + Please refer to the previous Select Operator [SEL_1033] <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_162] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_964] (rows=2299139 width=435) - Conds:RS_159._col6=RS_1024._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] + Merge Join Operator [MERGEJOIN_964] (rows=20709989 width=443) + Conds:RS_159._col6=RS_1031._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col24","_col25"] <-Map 45 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1024] + SHUFFLE [RS_1031] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1022] + Please refer to the previous Select Operator [SEL_1029] <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_159] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_963] (rows=2299139 width=259) - Conds:RS_156._col1, _col7=RS_1020._col0, _col1(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_963] (rows=20709989 width=267) + Conds:RS_156._col1, _col7=RS_1027._col0, _col1(Inner),Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] <-Map 44 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1020] + SHUFFLE [RS_1027] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1018] + Please refer to the previous Select Operator [SEL_1025] <-Reducer 24 [SIMPLE_EDGE] SHUFFLE [RS_156] PartitionCols:_col1, _col7 - Merge Join Operator [MERGEJOIN_962] (rows=1394510 width=123) - Conds:RS_153._col1=RS_1056._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_962] (rows=12561347 width=135) + Conds:RS_153._col1=RS_1064._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_153] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_960] (rows=1394510 width=123) - Conds:RS_150._col2=RS_1007._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_960] (rows=12561347 width=135) + Conds:RS_150._col2=RS_1012._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15","_col16","_col17","_col18","_col19"] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1007] + SHUFFLE [RS_1012] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1005] + Please refer to the previous Select Operator [SEL_1010] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_150] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_959] (rows=1608052 width=119) - Conds:RS_147._col0=RS_1003._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_959] (rows=14484878 width=119) + Conds:RS_147._col0=RS_988._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1003] + PARTITION_ONLY_SHUFFLE [RS_988] PartitionCols:_col0 - Select Operator [SEL_1001] (rows=652 width=4) + Select Operator [SEL_985] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_995] (rows=652 width=8) + Filter Operator [FIL_979] (rows=652 width=8) predicate:(d_year = 2001) Please refer to the previous TableScan [TS_38] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_147] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_958] (rows=4503592 width=119) - Conds:RS_1049._col1=RS_978._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_958] (rows=40567099 width=312) + Conds:RS_1055._col1=RS_1003._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_978] + PARTITION_ONLY_SHUFFLE [RS_1003] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_975] - <-Map 50 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1049] + Please refer to the previous Select Operator [SEL_1000] + <-Map 52 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1055] PartitionCols:_col1 - Select Operator [SEL_1048] (rows=417313408 width=351) + Select Operator [SEL_1054] (rows=417313408 width=351) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_1047] (rows=417313408 width=355) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_145_item_i_item_sk_min) AND DynamicValue(RS_145_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_145_item_i_item_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + Filter Operator [FIL_1053] (rows=417313408 width=355) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_148_d1_d_date_sk_min) AND DynamicValue(RS_148_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_148_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_96] (rows=575995635 width=355) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1045] - Group By Operator [GBY_1044] (rows=1 width=12) + <-Reducer 49 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1052] + Group By Operator [GBY_1051] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_983] - Group By Operator [GBY_981] (rows=1 width=12) + <-Map 47 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_993] + Group By Operator [GBY_991] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_979] (rows=518 width=4) + Select Operator [SEL_989] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_975] + Please refer to the previous Select Operator [SEL_985] <-Reducer 43 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_1056] + FORWARD [RS_1064] PartitionCols:_col0 - Select Operator [SEL_1055] (rows=13257 width=4) + Select Operator [SEL_1063] (rows=13257 width=4) Output:["_col0"] - Filter Operator [FIL_1054] (rows=13257 width=228) + Filter Operator [FIL_1062] (rows=13257 width=228) predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1053] (rows=39773 width=228) + Group By Operator [GBY_1061] (rows=39773 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 42 [SIMPLE_EDGE] SHUFFLE [RS_117] @@ -726,21 +736,29 @@ Stage-0 Group By Operator [GBY_116] (rows=6482999 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 Merge Join Operator [MERGEJOIN_961] (rows=183085709 width=227) - Conds:RS_1052._col0, _col1=RS_1013._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + Conds:RS_1060._col0, _col1=RS_1020._col0, _col1(Inner),Output:["_col0","_col2","_col5"] <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1013] + SHUFFLE [RS_1020] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1011] - <-Map 51 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1052] + Please refer to the previous Select Operator [SEL_1018] + <-Map 53 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1060] PartitionCols:_col0, _col1 - Select Operator [SEL_1051] (rows=287989836 width=119) + Select Operator [SEL_1059] (rows=287989836 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1050] (rows=287989836 width=119) + Filter Operator [FIL_1058] (rows=287989836 width=119) predicate:(cs_item_sk BETWEEN DynamicValue(RS_145_item_i_item_sk_min) AND DynamicValue(RS_145_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_145_item_i_item_sk_bloom_filter))) TableScan [TS_108] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1046] - Please refer to the previous Group By Operator [GBY_1044] + BROADCAST [RS_1057] + Group By Operator [GBY_1056] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1008] + Group By Operator [GBY_1006] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1004] (rows=4666 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1000] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out index 2aa27db1ef..aeb33836cd 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out @@ -183,136 +183,136 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Reducer 12 (BROADCAST_EDGE) -Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 14 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 15 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_209] - Limit [LIM_208] (rows=72 width=832) + Reducer 6 vectorized + File Output Operator [FS_207] + Limit [LIM_206] (rows=72 width=832) Number of rows:100 - Select Operator [SEL_207] (rows=72 width=832) + Select Operator [SEL_205] (rows=72 width=832) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_206] - Select Operator [SEL_205] (rows=72 width=832) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_204] + Select Operator [SEL_203] (rows=72 width=832) Output:["_col4","_col5","_col6","_col7"] - Group By Operator [GBY_204] (rows=72 width=353) + Group By Operator [GBY_202] (rows=72 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_42] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_43] PartitionCols:_col0 - Group By Operator [GBY_41] (rows=72 width=353) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col11)","count(_col11)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col36 - Merge Join Operator [MERGEJOIN_179] (rows=16740 width=100) - Conds:RS_37._col4=RS_203._col0(Inner),Output:["_col6","_col7","_col11","_col36"] + Group By Operator [GBY_42] (rows=288 width=353) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col27)","count(_col27)","sum(_col26)","count(_col26)"],keys:_col36 + Merge Join Operator [MERGEJOIN_177] (rows=2912400 width=313) + Conds:RS_38._col24=RS_201._col0(Inner),Output:["_col3","_col26","_col27","_col36"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_202] (rows=72 width=101) + Select Operator [SEL_200] (rows=72 width=101) Output:["_col0","_col1"] - TableScan [TS_18] (rows=72 width=101) + TableScan [TS_28] (rows=72 width=101) default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col4 - Filter Operator [FIL_36] (rows=16740 width=39) - predicate:((_col29 and _col30 and _col15) or (_col31 and _col32 and _col16) or (_col33 and _col34 and _col17)) - Merge Join Operator [MERGEJOIN_178] (rows=44640 width=39) - Conds:RS_33._col20, _col1, _col19=RS_197._col2, _col0, _col1(Inner),Output:["_col4","_col6","_col7","_col11","_col15","_col16","_col17","_col29","_col30","_col31","_col32","_col33","_col34"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_197] - PartitionCols:_col2, _col0, _col1 - Select Operator [SEL_195] (rows=265971 width=207) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_194] (rows=265971 width=183) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) - TableScan [TS_15] (rows=1861800 width=183) - default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col20, _col1, _col19 - Filter Operator [FIL_32] (rows=44640 width=218) - predicate:((_col23 and _col12) or (_col24 and _col13) or (_col25 and _col14)) - Merge Join Operator [MERGEJOIN_177] (rows=59520 width=218) - Conds:RS_29._col2=RS_201._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col19","_col20","_col23","_col24","_col25"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] - PartitionCols:_col0 - Select Operator [SEL_200] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_199] (rows=3529412 width=187) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) - TableScan [TS_12] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_176] (rows=674551 width=254) - Conds:RS_26._col8=RS_185._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col19","_col20"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col24 + Filter Operator [FIL_36] (rows=2912400 width=280) + predicate:(((_col14 and _col15 and _col7) or (_col16 and _col17 and _col8) or (_col18 and _col19 and _col9)) and ((_col29 and _col4) or (_col30 and _col5) or (_col31 and _col6))) + Merge Join Operator [MERGEJOIN_176] (rows=10355208 width=280) + Conds:RS_33._col1, _col2=RS_34._col9, _col14(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col14","_col15","_col16","_col17","_col18","_col19","_col24","_col26","_col27","_col29","_col30","_col31"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col9, _col14 + Select Operator [SEL_27] (rows=1056644 width=155) + Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14","_col15","_col16","_col18","_col19","_col20"] + Merge Join Operator [MERGEJOIN_175] (rows=1056644 width=155) + Conds:RS_24._col1, _col13, _col14=RS_198._col0, _col1, _col2(Inner),Output:["_col0","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col18","_col19","_col20","_col21","_col22","_col23"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_198] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_196] (rows=265971 width=207) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_195] (rows=265971 width=183) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) + TableScan [TS_15] (rows=1861800 width=183) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col1, _col13, _col14 + Merge Join Operator [MERGEJOIN_174] (rows=1056644 width=312) + Conds:RS_21._col3=RS_199._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col13","_col14"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_199] PartitionCols:_col0 - Select Operator [SEL_184] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_183] (rows=652 width=8) - predicate:(d_year = 1998) - TableScan [TS_9] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_175] (rows=1889180 width=379) - Conds:RS_23._col3=RS_198._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col8","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col19","_col20"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + Select Operator [SEL_197] (rows=265971 width=183) + Output:["_col0","_col1","_col2"] + Please refer to the previous Filter Operator [FIL_195] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_173] (rows=1056644 width=135) + Conds:RS_191._col2=RS_194._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_196] (rows=265971 width=183) - Output:["_col0","_col1","_col2"] - Please refer to the previous Filter Operator [FIL_194] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_23] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_174] (rows=13039884 width=262) - Conds:RS_182._col0, _col5=RS_193._col1, _col2(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_182] - PartitionCols:_col0, _col5 - Select Operator [SEL_181] (rows=11975292 width=237) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_180] (rows=11975292 width=237) - predicate:(wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) - TableScan [TS_0] (rows=14398467 width=237) - default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] - PartitionCols:_col1, _col2 - Select Operator [SEL_192] (rows=15992347 width=39) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_191] (rows=15992347 width=243) - predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_27_date_dim_d_date_sk_min) AND DynamicValue(RS_27_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_27_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_3] (rows=144002668 width=243) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_190] - Group By Operator [GBY_189] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] - Group By Operator [GBY_187] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_186] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_184] + Select Operator [SEL_193] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_192] (rows=3529412 width=187) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV')) + TableScan [TS_9] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_191] + PartitionCols:_col2 + Select Operator [SEL_190] (rows=11975292 width=237) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_189] (rows=11975292 width=237) + predicate:(wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) + TableScan [TS_6] (rows=14398467 width=237) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_172] (rows=51392014 width=35) + Conds:RS_188._col0=RS_180._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_180] + PartitionCols:_col0 + Select Operator [SEL_179] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_178] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_188] + PartitionCols:_col0 + Select Operator [SEL_187] (rows=143931136 width=39) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_186] (rows=143931136 width=243) + predicate:(((ws_net_profit >= 100) or (ws_net_profit <= 200) or ws_net_profit is not null or (ws_net_profit <= 300) or (ws_net_profit <= 250)) and ((ws_sales_price >= 100) or (ws_sales_price <= 150) or ws_sales_price is not null or (ws_sales_price <= 200)) and (ws_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_0] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_185] + Group By Operator [GBY_184] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_183] + Group By Operator [GBY_182] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_181] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_179] diff --git ql/src/test/results/clientpositive/perf/tez/query13.q.out ql/src/test/results/clientpositive/perf/tez/query13.q.out index 2cf833bbf1..60a73d7250 100644 --- ql/src/test/results/clientpositive/perf/tez/query13.q.out +++ ql/src/test/results/clientpositive/perf/tez/query13.q.out @@ -115,14 +115,14 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 8 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 13 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -137,98 +137,96 @@ Stage-0 <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_37] Group By Operator [GBY_36] (rows=1 width=256) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)"] - Select Operator [SEL_35] (rows=40950 width=44) - Output:["_col6","_col7","_col8"] - Filter Operator [FIL_34] (rows=40950 width=44) - predicate:((_col24 and _col25 and _col12 and _col17) or (_col26 and _col27 and _col13 and _col18) or (_col28 and _col29 and _col14 and _col18)) - Merge Join Operator [MERGEJOIN_121] (rows=218403 width=44) - Conds:RS_31._col2=RS_144._col0(Inner),Output:["_col6","_col7","_col8","_col12","_col13","_col14","_col17","_col18","_col24","_col25","_col26","_col27","_col28","_col29"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] - PartitionCols:_col0 - Select Operator [SEL_143] (rows=265971 width=28) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_142] (rows=265971 width=183) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=183) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col2 - Filter Operator [FIL_30] (rows=218403 width=44) - predicate:((_col20 and _col9) or (_col21 and _col10) or (_col22 and _col11)) - Merge Join Operator [MERGEJOIN_120] (rows=291204 width=44) - Conds:RS_27._col4=RS_141._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col17","_col18","_col20","_col21","_col22"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col5)","count(_col5)","sum(_col6)","count(_col6)","sum(_col7)","count(_col7)"] + Merge Join Operator [MERGEJOIN_121] (rows=368553 width=0) + Conds:RS_32._col4=RS_144._col0(Inner),Output:["_col5","_col6","_col7"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_144] + PartitionCols:_col0 + Select Operator [SEL_143] (rows=1704 width=4) + Output:["_col0"] + Filter Operator [FIL_142] (rows=1704 width=4) + predicate:s_store_sk is not null + TableScan [TS_15] (rows=1704 width=4) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col4 + Filter Operator [FIL_31] (rows=368553 width=44) + predicate:((_col20 and _col21 and _col11 and _col27) or (_col22 and _col23 and _col12 and _col28) or (_col24 and _col25 and _col13 and _col28)) + Merge Join Operator [MERGEJOIN_120] (rows=1965626 width=44) + Conds:RS_28._col2=RS_141._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col20","_col21","_col22","_col23","_col24","_col25","_col27","_col28"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_141] + PartitionCols:_col0 + Select Operator [SEL_140] (rows=1309 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_139] (rows=1309 width=8) + predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) + TableScan [TS_12] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_119] (rows=10811694 width=36) + Conds:RS_25._col1=RS_138._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col11","_col12","_col13","_col20","_col21","_col22","_col23","_col24","_col25"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_140] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_139] (rows=3529412 width=187) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_119] (rows=3300311 width=104) - Conds:RS_24._col3=RS_138._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col17","_col18"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] - PartitionCols:_col0 - Select Operator [SEL_137] (rows=1309 width=12) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_136] (rows=1309 width=8) - predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=8) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_118] (rows=18152968 width=233) - Conds:RS_21._col5=RS_135._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] - PartitionCols:_col0 - Select Operator [SEL_134] (rows=1704 width=4) - Output:["_col0"] - Filter Operator [FIL_133] (rows=1704 width=4) - predicate:s_store_sk is not null - TableScan [TS_6] (rows=1704 width=4) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_117] (rows=18152968 width=237) - Conds:RS_124._col0=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Map 1 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] - PartitionCols:_col0 - Select Operator [SEL_123] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_122] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_0] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - PartitionCols:_col0 - Select Operator [SEL_131] (rows=50840141 width=260) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_130] (rows=50840141 width=450) - predicate:((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_18_date_dim_d_date_sk_min) AND DynamicValue(RS_18_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_18_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=450) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_125] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_123] + Select Operator [SEL_137] (rows=265971 width=28) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_136] (rows=265971 width=183) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_9] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col1 + Filter Operator [FIL_24] (rows=10811694 width=36) + predicate:((_col16 and _col8) or (_col17 and _col9) or (_col18 and _col10)) + Merge Join Operator [MERGEJOIN_118] (rows=14415593 width=36) + Conds:RS_21._col3=RS_135._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col16","_col17","_col18"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] + PartitionCols:_col0 + Select Operator [SEL_134] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_133] (rows=3529412 width=187) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_117] (rows=163376714 width=237) + Conds:RS_132._col0=RS_124._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_122] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col0 + Select Operator [SEL_131] (rows=457561292 width=260) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Filter Operator [FIL_130] (rows=457561292 width=450) + predicate:(((ss_net_profit >= 100) or (ss_net_profit <= 200) or ss_net_profit is not null or (ss_net_profit <= 300) or (ss_net_profit <= 250)) and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or ss_sales_price is not null or (ss_sales_price <= 200)) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=450) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_129] + Group By Operator [GBY_128] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_127] + Group By Operator [GBY_126] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_125] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_123] diff --git ql/src/test/results/clientpositive/perf/tez/query21.q.out ql/src/test/results/clientpositive/perf/tez/query21.q.out index 382775a8cc..0d10843009 100644 --- ql/src/test/results/clientpositive/perf/tez/query21.q.out +++ ql/src/test/results/clientpositive/perf/tez/query21.q.out @@ -83,12 +83,12 @@ Stage-0 File Output Operator [FS_95] Limit [LIM_94] (rows=100 width=216) Number of rows:100 - Select Operator [SEL_93] (rows=231983 width=216) + Select Operator [SEL_93] (rows=115991 width=216) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_92] - Filter Operator [FIL_91] (rows=231983 width=216) - predicate:CASE WHEN ((_col2 > 0L)) THEN ((UDFToDouble(_col3) / UDFToDouble(_col2)) BETWEEN 0.666667D AND 1.5D) ELSE (null) END + Filter Operator [FIL_91] (rows=115991 width=216) + predicate:(CASE WHEN ((_col2 > 0L)) THEN (((UDFToDouble(_col3) / UDFToDouble(_col2)) <= 1.5D)) ELSE (null) END and CASE WHEN ((_col2 > 0L)) THEN ((0.666667D <= (UDFToDouble(_col3) / UDFToDouble(_col2)))) ELSE (null) END) Group By Operator [GBY_90] (rows=463966 width=216) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/tez/query34.q.out ql/src/test/results/clientpositive/perf/tez/query34.q.out index 4640899c73..d401e4cdc0 100644 --- ql/src/test/results/clientpositive/perf/tez/query34.q.out +++ ql/src/test/results/clientpositive/perf/tez/query34.q.out @@ -155,7 +155,7 @@ Stage-0 Select Operator [SEL_105] (rows=595 width=4) Output:["_col0"] Filter Operator [FIL_104] (rows=595 width=12) - predicate:((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) + predicate:(((d_dom >= 1) or (d_dom <= 3) or (d_dom >= 25) or (d_dom <= 28)) and (d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_dom"] <-Map 4 [SIMPLE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query48.q.out ql/src/test/results/clientpositive/perf/tez/query48.q.out index 885be1371f..d64eec1c74 100644 --- ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -143,13 +143,13 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 7 <- Reducer 9 (BROADCAST_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -162,82 +162,80 @@ Stage-0 <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_30] Group By Operator [GBY_29] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(_col5)"] - Select Operator [SEL_28] (rows=20247 width=24) - Output:["_col5"] - Filter Operator [FIL_27] (rows=20247 width=24) - predicate:((_col12 and _col6) or (_col13 and _col7) or (_col14 and _col8)) - Merge Join Operator [MERGEJOIN_96] (rows=26999 width=24) - Conds:RS_24._col3=RS_116._col0(Inner),Output:["_col5","_col6","_col7","_col8","_col12","_col13","_col14"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] - PartitionCols:_col0 - Select Operator [SEL_115] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_114] (rows=3529412 width=187) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_95] (rows=305980 width=12) - Conds:RS_21._col4=RS_113._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col8"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_113] - PartitionCols:_col0 - Select Operator [SEL_112] (rows=1704 width=4) - Output:["_col0"] - Filter Operator [FIL_111] (rows=1704 width=4) - predicate:s_store_sk is not null - TableScan [TS_9] (rows=1704 width=4) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_94] (rows=305980 width=12) - Conds:RS_18._col1=RS_102._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] - PartitionCols:_col0 - Select Operator [SEL_101] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_100] (rows=652 width=8) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_93] (rows=856943 width=12) - Conds:RS_99._col0=RS_110._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_99] - PartitionCols:_col0 - Select Operator [SEL_98] (rows=29552 width=4) - Output:["_col0"] - Filter Operator [FIL_97] (rows=29552 width=183) - predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_0] (rows=1861800 width=183) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - PartitionCols:_col1 - Select Operator [SEL_109] (rows=53235296 width=31) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_108] (rows=53235296 width=233) - predicate:((ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=233) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_107] - Group By Operator [GBY_106] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_105] - Group By Operator [GBY_104] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_103] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_101] + Output:["_col0"],aggregations:["sum(_col4)"] + Merge Join Operator [MERGEJOIN_96] (rows=170127 width=0) + Conds:RS_25._col3=RS_116._col0(Inner),Output:["_col4"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_116] + PartitionCols:_col0 + Select Operator [SEL_115] (rows=1704 width=4) + Output:["_col0"] + Filter Operator [FIL_114] (rows=1704 width=4) + predicate:s_store_sk is not null + TableScan [TS_12] (rows=1704 width=4) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col3 + Filter Operator [FIL_24] (rows=170127 width=24) + predicate:((_col11 and _col5) or (_col12 and _col6) or (_col13 and _col7)) + Merge Join Operator [MERGEJOIN_95] (rows=226838 width=24) + Conds:RS_21._col2=RS_113._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col11","_col12","_col13"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_113] + PartitionCols:_col0 + Select Operator [SEL_112] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_111] (rows=3529412 width=187) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_9] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_94] (rows=2570826 width=12) + Conds:RS_18._col1=RS_110._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + PartitionCols:_col0 + Select Operator [SEL_109] (rows=29552 width=4) + Output:["_col0"] + Filter Operator [FIL_108] (rows=29552 width=183) + predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) + TableScan [TS_6] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_93] (rows=57024544 width=25) + Conds:RS_107._col0=RS_99._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_99] + PartitionCols:_col0 + Select Operator [SEL_98] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_97] (rows=652 width=8) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=159705893 width=31) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_105] (rows=159705893 width=233) + predicate:(((ss_net_profit >= 0) or (ss_net_profit <= 2000) or ss_net_profit is not null or (ss_net_profit <= 3000) or (ss_net_profit <= 25000)) and ((ss_sales_price >= 100) or (ss_sales_price <= 150) or ss_sales_price is not null or (ss_sales_price <= 200)) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_102] + Group By Operator [GBY_101] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_100] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_98] diff --git ql/src/test/results/clientpositive/perf/tez/query54.q.out ql/src/test/results/clientpositive/perf/tez/query54.q.out index a029634671..a7d51017da 100644 --- ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -133,27 +133,28 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 15 <- Reducer 23 (BROADCAST_EDGE), Union 16 (CONTAINS) -Map 21 <- Reducer 23 (BROADCAST_EDGE), Union 16 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 17 <- Map 22 (SIMPLE_EDGE), Union 16 (SIMPLE_EDGE) -Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Map 16 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) +Map 22 <- Reducer 24 (BROADCAST_EDGE), Union 17 (CONTAINS) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 26 (SIMPLE_EDGE) -Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 32 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Map 26 (SIMPLE_EDGE) -Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 26 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE) +Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 27 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 27 (SIMPLE_EDGE) +Reducer 31 <- Reducer 30 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 27 (SIMPLE_EDGE) +Reducer 33 <- Map 27 (SIMPLE_EDGE) +Reducer 4 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 33 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -161,253 +162,260 @@ Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 9 vectorized - File Output Operator [FS_349] - Limit [LIM_348] (rows=1 width=16) + Reducer 10 vectorized + File Output Operator [FS_352] + Limit [LIM_351] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_347] (rows=1 width=16) + Select Operator [SEL_350] (rows=1 width=16) Output:["_col0","_col1","_col2"] - <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] - Select Operator [SEL_345] (rows=1 width=16) + <-Reducer 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_349] + Select Operator [SEL_348] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_344] (rows=1 width=12) + Group By Operator [GBY_347] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] + <-Reducer 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_346] PartitionCols:_col0 - Group By Operator [GBY_342] (rows=1 width=12) + Group By Operator [GBY_345] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_341] (rows=1 width=116) + Select Operator [SEL_344] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_340] (rows=1 width=116) + Group By Operator [GBY_343] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_119] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_118] PartitionCols:_col0 - Group By Operator [GBY_118] (rows=312 width=116) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Select Operator [SEL_117] (rows=624257222 width=127) - Output:["_col0","_col1"] - Filter Operator [FIL_116] (rows=624257222 width=127) - predicate:_col2 BETWEEN _col3 AND _col4 - Select Operator [SEL_115] (rows=5618315000 width=127) - Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_273] (rows=5618315000 width=127) - Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_112] - Merge Join Operator [MERGEJOIN_270] (rows=25 width=4) - Conds:(Right Outer),Output:["_col0"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_331] - Group By Operator [GBY_330] (rows=25 width=4) + Group By Operator [GBY_117] (rows=302 width=116) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col10 + Select Operator [SEL_116] (rows=624257216 width=123) + Output:["_col2","_col10"] + Filter Operator [FIL_115] (rows=624257216 width=123) + predicate:(_col4 <= _col15) + Merge Join Operator [MERGEJOIN_272] (rows=1872771650 width=123) + Conds:(Inner),Output:["_col2","_col4","_col10","_col15"] + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_341] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0 + Group By Operator [GBY_318] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_314] (rows=50 width=12) + Output:["_col0"] + Filter Operator [FIL_310] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999)) + TableScan [TS_50] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_112] + Filter Operator [FIL_111] (rows=74910866 width=119) + predicate:(_col14 <= _col4) + Merge Join Operator [MERGEJOIN_271] (rows=224732600 width=119) + Conds:(Inner),Output:["_col2","_col4","_col10","_col14"] + <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_340] + Group By Operator [GBY_339] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] PartitionCols:_col0 - Group By Operator [GBY_316] (rows=25 width=4) + Group By Operator [GBY_317] (rows=25 width=4) Output:["_col0"],keys:_col0 Select Operator [SEL_313] (rows=50 width=12) Output:["_col0"] - Filter Operator [FIL_311] (rows=50 width=12) - predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_73] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - Select Operator [SEL_338] (rows=1 width=8) - Filter Operator [FIL_337] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_336] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_335] - Group By Operator [GBY_334] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_333] (rows=25 width=4) - Group By Operator [GBY_332] (rows=25 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + Please refer to the previous Filter Operator [FIL_310] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_108] + Merge Join Operator [MERGEJOIN_270] (rows=8989304 width=8) + Conds:(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_338] + Select Operator [SEL_337] (rows=1 width=8) + Filter Operator [FIL_336] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_335] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_334] + Group By Operator [GBY_333] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_332] (rows=25 width=4) + Group By Operator [GBY_331] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] + PartitionCols:_col0 + Group By Operator [GBY_316] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_312] (rows=50 width=12) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_310] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_105] + Merge Join Operator [MERGEJOIN_269] (rows=8989304 width=8) + Conds:(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_330] + Select Operator [SEL_329] (rows=1 width=8) + Filter Operator [FIL_328] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_327] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_326] + Group By Operator [GBY_325] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_324] (rows=25 width=4) + Group By Operator [GBY_323] (rows=25 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_319] + PartitionCols:_col0 + Group By Operator [GBY_315] (rows=25 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_311] (rows=50 width=12) + Output:["_col0"] + Please refer to the previous Filter Operator [FIL_310] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_102] + Merge Join Operator [MERGEJOIN_268] (rows=8989304 width=8) + Conds:RS_99._col1=RS_100._col5(Inner),Output:["_col2","_col4","_col10"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_100] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_267] (rows=55046 width=4) + Conds:RS_46._col0=RS_309._col1(Inner),Output:["_col5"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col0 - Group By Operator [GBY_317] (rows=25 width=4) - Output:["_col0"],keys:_col0 - Select Operator [SEL_314] (rows=50 width=12) - Output:["_col0"] - Please refer to the previous Filter Operator [FIL_311] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_113] - Select Operator [SEL_108] (rows=224732600 width=119) - Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_272] (rows=224732600 width=119) - Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_323] - Group By Operator [GBY_321] (rows=25 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] - PartitionCols:_col0 - Group By Operator [GBY_315] (rows=25 width=4) - Output:["_col0"],keys:_col0 - Select Operator [SEL_312] (rows=50 width=12) - Output:["_col0"] - Please refer to the previous Filter Operator [FIL_311] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_105] - Merge Join Operator [MERGEJOIN_271] (rows=8989304 width=8) - Conds:(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_329] - Select Operator [SEL_328] (rows=1 width=8) - Filter Operator [FIL_327] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_326] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_325] - Group By Operator [GBY_324] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_322] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_321] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_102] - Merge Join Operator [MERGEJOIN_269] (rows=8989304 width=8) - Conds:RS_99._col1=RS_100._col5(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_100] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_268] (rows=55046 width=4) - Conds:RS_69._col0=RS_310._col1(Inner),Output:["_col5"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_264] (rows=39720279 width=4) - Conds:RS_292._col1, _col2=RS_295._col0, _col1(Inner),Output:["_col0"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] - PartitionCols:_col1, _col2 - Select Operator [SEL_291] (rows=40000000 width=188) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_290] (rows=40000000 width=188) - predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) - TableScan [TS_29] (rows=40000000 width=188) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] - PartitionCols:_col0, _col1 - Select Operator [SEL_294] (rows=1704 width=184) - Output:["_col0","_col1"] - Filter Operator [FIL_293] (rows=1704 width=184) - predicate:(s_county is not null and s_state is not null) - TableScan [TS_32] (rows=1704 width=184) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] - <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] - PartitionCols:_col1 - Select Operator [SEL_309] (rows=55046 width=8) - Output:["_col0","_col1"] - Group By Operator [GBY_308] (rows=55046 width=8) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_63] + Merge Join Operator [MERGEJOIN_263] (rows=39720279 width=4) + Conds:RS_291._col1, _col2=RS_294._col0, _col1(Inner),Output:["_col0"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_291] + PartitionCols:_col1, _col2 + Select Operator [SEL_290] (rows=40000000 width=188) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_289] (rows=40000000 width=188) + predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) + TableScan [TS_6] (rows=40000000 width=188) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] PartitionCols:_col0, _col1 - Group By Operator [GBY_62] (rows=55046 width=8) - Output:["_col0","_col1"],keys:_col6, _col5 - Merge Join Operator [MERGEJOIN_267] (rows=110092 width=8) - Conds:RS_58._col1=RS_307._col0(Inner),Output:["_col5","_col6"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] - PartitionCols:_col0 - Select Operator [SEL_306] (rows=80000000 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_305] (rows=80000000 width=8) - predicate:(c_current_addr_sk is not null and c_customer_sk is not null) - TableScan [TS_49] (rows=80000000 width=8) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_266] (rows=110092 width=0) - Conds:RS_55._col2=RS_304._col0(Inner),Output:["_col1"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_304] - PartitionCols:_col0 - Select Operator [SEL_303] (rows=453 width=4) - Output:["_col0"] - Filter Operator [FIL_302] (rows=453 width=186) - predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) - TableScan [TS_46] (rows=462000 width=186) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_265] (rows=11665117 width=7) - Conds:Union 16._col0=RS_298._col0(Inner),Output:["_col1","_col2"] - <-Map 22 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_298] - PartitionCols:_col0 - Select Operator [SEL_297] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_296] (rows=50 width=12) - predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_43] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Union 16 [SIMPLE_EDGE] - <-Map 15 [CONTAINS] vectorized - Reduce Output Operator [RS_355] - PartitionCols:_col0 - Select Operator [SEL_354] (rows=285117831 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_353] (rows=285117831 width=11) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_274] (rows=287989836 width=11) - Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_351] - Group By Operator [GBY_350] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_301] - Group By Operator [GBY_300] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_299] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_297] - <-Map 21 [CONTAINS] vectorized - Reduce Output Operator [RS_358] + Select Operator [SEL_293] (rows=1704 width=184) + Output:["_col0","_col1"] + Filter Operator [FIL_292] (rows=1704 width=184) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_9] (rows=1704 width=184) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] + <-Reducer 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_309] + PartitionCols:_col1 + Select Operator [SEL_308] (rows=55046 width=8) + Output:["_col0","_col1"] + Group By Operator [GBY_307] (rows=55046 width=8) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0, _col1 + Group By Operator [GBY_39] (rows=55046 width=8) + Output:["_col0","_col1"],keys:_col6, _col5 + Merge Join Operator [MERGEJOIN_266] (rows=110092 width=8) + Conds:RS_35._col1=RS_306._col0(Inner),Output:["_col5","_col6"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_306] + PartitionCols:_col0 + Select Operator [SEL_305] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_304] (rows=80000000 width=8) + predicate:(c_current_addr_sk is not null and c_customer_sk is not null) + TableScan [TS_26] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_265] (rows=110092 width=0) + Conds:RS_32._col2=RS_303._col0(Inner),Output:["_col1"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_303] + PartitionCols:_col0 + Select Operator [SEL_302] (rows=453 width=4) + Output:["_col0"] + Filter Operator [FIL_301] (rows=453 width=186) + predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) + TableScan [TS_23] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_264] (rows=11665117 width=7) + Conds:Union 17._col0=RS_297._col0(Inner),Output:["_col1","_col2"] + <-Map 23 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_297] PartitionCols:_col0 - Select Operator [SEL_357] (rows=143930993 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_356] (rows=143930993 width=11) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_279] (rows=144002668 width=11) - Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_352] - Please refer to the previous Group By Operator [GBY_350] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_99] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_263] (rows=525327388 width=114) - Conds:RS_286._col0=RS_289._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] - PartitionCols:_col0 - Select Operator [SEL_285] (rows=525327388 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_284] (rows=525327388 width=114) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] - PartitionCols:_col0 - Select Operator [SEL_288] (rows=73049 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_287] (rows=73049 width=8) - predicate:d_date_sk is not null - TableScan [TS_26] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + Select Operator [SEL_296] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_295] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_20] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] vectorized + Reduce Output Operator [RS_358] + PartitionCols:_col0 + Select Operator [SEL_357] (rows=285117831 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_356] (rows=285117831 width=11) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_273] (rows=287989836 width=11) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_354] + Group By Operator [GBY_353] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_300] + Group By Operator [GBY_299] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_298] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_296] + <-Map 22 [CONTAINS] vectorized + Reduce Output Operator [RS_361] + PartitionCols:_col0 + Select Operator [SEL_360] (rows=143930993 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_359] (rows=143930993 width=11) + predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_278] (rows=144002668 width=11) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_355] + Please refer to the previous Group By Operator [GBY_353] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_99] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_262] (rows=525327388 width=114) + Conds:RS_285._col0=RS_288._col0(Inner),Output:["_col1","_col2","_col4"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_285] + PartitionCols:_col0 + Select Operator [SEL_284] (rows=525327388 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_283] (rows=525327388 width=114) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_288] + PartitionCols:_col0 + Select Operator [SEL_287] (rows=73049 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_286] (rows=73049 width=8) + predicate:d_date_sk is not null + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git ql/src/test/results/clientpositive/perf/tez/query58.q.out ql/src/test/results/clientpositive/perf/tez/query58.q.out index bff619a052..3ab6fa2048 100644 --- ql/src/test/results/clientpositive/perf/tez/query58.q.out +++ ql/src/test/results/clientpositive/perf/tez/query58.q.out @@ -181,7 +181,7 @@ Stage-0 Select Operator [SEL_162] (rows=1 width=884) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Filter Operator [FIL_158] (rows=1 width=1108) - predicate:(_col1 BETWEEN _col10 AND _col11 and _col5 BETWEEN _col10 AND _col11 and _col9 BETWEEN _col2 AND _col3 and _col9 BETWEEN _col6 AND _col7) + predicate:(_col1 BETWEEN _col10 AND _col11 and _col5 BETWEEN _col10 AND _col11 and _col9 BETWEEN _col3 AND _col7 and _col9 BETWEEN _col6 AND _col2) Merge Join Operator [MERGEJOIN_419] (rows=1 width=1108) Conds:RS_155._col0=RS_467._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col9","_col10","_col11"] <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized @@ -299,7 +299,7 @@ Stage-0 FORWARD [RS_155] PartitionCols:_col0 Filter Operator [FIL_153] (rows=1 width=772) - predicate:(_col1 BETWEEN _col6 AND _col7 and _col5 BETWEEN _col2 AND _col3) + predicate:(_col1 BETWEEN _col6 AND _col7 and _col5 BETWEEN _col3 AND _col2) Merge Join Operator [MERGEJOIN_418] (rows=68 width=772) Conds:RS_451._col0=RS_459._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7"] <-Reducer 11 [ONE_TO_ONE_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query64.q.out ql/src/test/results/clientpositive/perf/tez/query64.q.out index 0010c469c7..14bc6f0837 100644 --- ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -265,488 +265,518 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 34 <- Reducer 37 (BROADCAST_EDGE) -Map 40 <- Reducer 37 (BROADCAST_EDGE) -Map 50 <- Reducer 39 (BROADCAST_EDGE) -Map 51 <- Reducer 39 (BROADCAST_EDGE) -Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 29 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 13 <- Map 49 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 42 (ONE_TO_ONE_EDGE) -Reducer 18 <- Map 46 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 20 <- Map 47 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 33 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 48 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 23 <- Map 15 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE) -Reducer 24 <- Reducer 23 (SIMPLE_EDGE), Reducer 45 (ONE_TO_ONE_EDGE) -Reducer 25 <- Map 46 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 27 <- Map 47 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) -Reducer 28 <- Map 33 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) -Reducer 29 <- Map 48 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 31 <- Map 30 (SIMPLE_EDGE), Map 32 (SIMPLE_EDGE) +Map 38 <- Reducer 25 (BROADCAST_EDGE) +Map 44 <- Reducer 41 (BROADCAST_EDGE) +Map 54 <- Reducer 33 (BROADCAST_EDGE) +Map 55 <- Reducer 43 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 32 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 14 <- Map 53 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 36 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Reducer 39 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 46 (ONE_TO_ONE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 20 <- Map 50 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 34 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 51 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 37 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 52 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 17 (SIMPLE_EDGE), Reducer 42 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 49 (ONE_TO_ONE_EDGE) +Reducer 28 <- Map 50 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 34 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 51 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Map 37 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 52 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Map 17 (CUSTOM_SIMPLE_EDGE) Reducer 35 <- Map 34 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) -Reducer 37 <- Map 36 (CUSTOM_SIMPLE_EDGE) -Reducer 38 <- Map 36 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE) -Reducer 39 <- Map 36 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) -Reducer 42 <- Reducer 41 (SIMPLE_EDGE) -Reducer 44 <- Map 43 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE) -Reducer 45 <- Reducer 44 (SIMPLE_EDGE) -Reducer 5 <- Map 33 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 49 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 49 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) +Reducer 41 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 42 <- Map 40 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) +Reducer 43 <- Map 40 (CUSTOM_SIMPLE_EDGE) +Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE) +Reducer 46 <- Reducer 45 (SIMPLE_EDGE) +Reducer 48 <- Map 47 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) +Reducer 49 <- Reducer 48 (SIMPLE_EDGE) +Reducer 5 <- Map 37 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 53 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 24 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 53 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 36 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 11 vectorized - File Output Operator [FS_1176] - Select Operator [SEL_1175] (rows=2169965329 width=1702) + Reducer 12 vectorized + File Output Operator [FS_1208] + Select Operator [SEL_1207] (rows=98871277768 width=1702) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_257] - Select Operator [SEL_256] (rows=2169965329 width=1694) + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_255] + Select Operator [SEL_254] (rows=98871277768 width=1694) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_255] (rows=2169965329 width=1694) + Filter Operator [FIL_253] (rows=98871277768 width=1694) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_1087] (rows=6509895988 width=1694) - Conds:RS_1158._col2, _col1, _col3=RS_1174._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] - <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1174] - PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_1173] (rows=2299138 width=525) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_1172] (rows=2299138 width=1362) + Merge Join Operator [MERGEJOIN_1107] (rows=296613833305 width=1694) + Conds:RS_1189._col2, _col1, _col3=RS_1206._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + <-Reducer 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1189] + PartitionCols:_col2, _col1, _col3 + Select Operator [SEL_1188] (rows=20709988 width=1354) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + Group By Operator [GBY_1187] (rows=20709988 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_249] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_122] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_248] (rows=2299138 width=1362) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col47 - Select Operator [SEL_247] (rows=2331650 width=1292) - Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col47"] - Filter Operator [FIL_246] (rows=2331650 width=1292) - predicate:(_col51 <> _col19) - Merge Join Operator [MERGEJOIN_1086] (rows=2331650 width=1292) - Conds:RS_243._col37=RS_1116._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col47","_col51"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1116] - PartitionCols:_col0 - Select Operator [SEL_1113] (rows=1861800 width=89) - Output:["_col0","_col1"] - Filter Operator [FIL_1112] (rows=1861800 width=89) - predicate:cd_demo_sk is not null - TableScan [TS_96] (rows=1861800 width=89) - default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_243] - PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_1085] (rows=2299138 width=1205) - Conds:RS_240._col0=RS_241._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col47"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_240] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1056] (rows=70357394 width=458) - Conds:RS_111._col1=RS_1115._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1115] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1113] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_111] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1055] (rows=69376329 width=376) - Conds:RS_108._col3=RS_1109._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1109] - PartitionCols:_col0 - Select Operator [SEL_1108] (rows=40000000 width=365) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1107] (rows=40000000 width=365) - predicate:ca_address_sk is not null - TableScan [TS_19] (rows=40000000 width=365) - default@customer_address,ad2,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_108] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_1054] (rows=69376329 width=19) - Conds:RS_105._col2=RS_106._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_106] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1053] (rows=7200 width=4) - Conds:RS_1103._col1=RS_1106._col0(Inner),Output:["_col0"] - <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1103] - PartitionCols:_col1 - Select Operator [SEL_1102] (rows=7200 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_1101] (rows=7200 width=8) - predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) - TableScan [TS_9] (rows=7200 width=8) - default@household_demographics,hd2,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1106] - PartitionCols:_col0 - Select Operator [SEL_1105] (rows=20 width=4) - Output:["_col0"] - Filter Operator [FIL_1104] (rows=20 width=4) - predicate:ib_income_band_sk is not null - TableScan [TS_12] (rows=20 width=4) - default@income_band,ib2,Tbl:COMPLETE,Col:COMPLETE,Output:["ib_income_band_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_105] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_1052] (rows=69376329 width=23) - Conds:RS_102._col4=RS_1098._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1098] - PartitionCols:_col0 - Select Operator [SEL_1094] (rows=73049 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_1091] (rows=73049 width=8) - predicate:d_date_sk is not null - TableScan [TS_3] (rows=73049 width=8) - default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_102] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_1051] (rows=69376329 width=23) - Conds:RS_1090._col5=RS_1097._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1097] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1094] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1090] - PartitionCols:_col5 - Select Operator [SEL_1089] (rows=69376329 width=23) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1088] (rows=69376329 width=23) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) - TableScan [TS_0] (rows=80000000 width=23) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] - <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_241] - PartitionCols:_col16 - Select Operator [SEL_221] (rows=2651207 width=784) - Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col27"] - Merge Join Operator [MERGEJOIN_1084] (rows=2651207 width=784) - Conds:RS_218._col5, _col12=RS_1155._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col17","_col21","_col22","_col24","_col25","_col26","_col27"] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1155] - PartitionCols:_col0, _col1 - Select Operator [SEL_1153] (rows=57591150 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_1152] (rows=57591150 width=8) - predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_77] (rows=57591150 width=8) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_218] - PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_1083] (rows=1608052 width=657) - Conds:RS_215._col9=RS_1111._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col17","_col21","_col22","_col24","_col25","_col26","_col27"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1111] + Group By Operator [GBY_121] (rows=20709988 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col42)","sum(_col43)","sum(_col44)"],keys:_col28, _col45, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col46 + Merge Join Operator [MERGEJOIN_1088] (rows=21002852 width=1122) + Conds:RS_117._col31=RS_1135._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col42","_col43","_col44","_col45","_col46"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1135] + PartitionCols:_col0 + Select Operator [SEL_1133] (rows=20 width=4) + Output:["_col0"] + Filter Operator [FIL_1132] (rows=20 width=4) + predicate:ib_income_band_sk is not null + TableScan [TS_12] (rows=20 width=4) + default@income_band,ib2,Tbl:COMPLETE,Col:COMPLETE,Output:["ib_income_band_sk"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_117] + PartitionCols:_col31 + Filter Operator [FIL_116] (rows=21002852 width=1296) + predicate:(_col50 <> _col19) + Merge Join Operator [MERGEJOIN_1087] (rows=21002852 width=1296) + Conds:RS_113._col36=RS_1144._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col42","_col43","_col44","_col45","_col46","_col50"] + <-Map 53 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1144] + PartitionCols:_col0 + Select Operator [SEL_1143] (rows=1861800 width=89) + Output:["_col0","_col1"] + Filter Operator [FIL_1142] (rows=1861800 width=89) + predicate:cd_demo_sk is not null + TableScan [TS_89] (rows=1861800 width=89) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_113] + PartitionCols:_col36 + Merge Join Operator [MERGEJOIN_1086] (rows=20709988 width=1209) + Conds:RS_110._col0=RS_111._col15(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col36","_col42","_col43","_col44","_col45","_col46"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_110] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_1076] (rows=70357394 width=458) + Conds:RS_107._col1=RS_1145._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] + <-Map 53 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1145] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1143] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_107] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_1075] (rows=69376329 width=376) + Conds:RS_104._col3=RS_1139._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1139] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1108] - <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_215] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1082] (rows=1608052 width=296) - Conds:RS_212._col10=RS_1151._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col17","_col21","_col22"] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1151] + Select Operator [SEL_1138] (rows=40000000 width=365) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_1137] (rows=40000000 width=365) + predicate:ca_address_sk is not null + TableScan [TS_19] (rows=40000000 width=365) + default@customer_address,ad2,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_1074] (rows=69376329 width=19) + Conds:RS_101._col2=RS_102._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_1072] (rows=69376329 width=23) + Conds:RS_98._col4=RS_1118._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1118] + PartitionCols:_col0 + Select Operator [SEL_1114] (rows=73049 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_1111] (rows=73049 width=8) + predicate:d_date_sk is not null + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_98] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_1071] (rows=69376329 width=23) + Conds:RS_1110._col5=RS_1117._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1117] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1114] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1110] + PartitionCols:_col5 + Select Operator [SEL_1109] (rows=69376329 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_1108] (rows=69376329 width=23) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) + TableScan [TS_0] (rows=80000000 width=23) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_102] PartitionCols:_col0 - Select Operator [SEL_1149] (rows=1704 width=181) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1148] (rows=1704 width=181) - predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) - TableScan [TS_71] (rows=1704 width=181) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 26 [SIMPLE_EDGE] - SHUFFLE [RS_212] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_1081] (rows=1608052 width=119) - Conds:RS_209._col0=RS_210._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col17"] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_209] + Merge Join Operator [MERGEJOIN_1073] (rows=7200 width=4) + Conds:RS_1129._col1=RS_1134._col0(Inner),Output:["_col0"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1129] + PartitionCols:_col1 + Select Operator [SEL_1128] (rows=7200 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_1127] (rows=7200 width=8) + predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) + TableScan [TS_9] (rows=7200 width=8) + default@household_demographics,hd2,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1134] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_210] - PartitionCols:_col5 - Select Operator [SEL_196] (rows=1608052 width=119) - Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_1080] (rows=1608052 width=119) - Conds:RS_193._col7=RS_1147._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1147] + Please refer to the previous Select Operator [SEL_1133] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_111] + PartitionCols:_col15 + Select Operator [SEL_88] (rows=23881330 width=788) + Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col11","_col15","_col16","_col22","_col23","_col24","_col25","_col26"] + Merge Join Operator [MERGEJOIN_1085] (rows=23881330 width=788) + Conds:RS_85._col1, _col8=RS_1185._col0, _col1(Inner),Output:["_col2","_col3","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21","_col23","_col24","_col25","_col26"] + <-Map 52 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1185] + PartitionCols:_col0, _col1 + Select Operator [SEL_1184] (rows=57591150 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_1183] (rows=57591150 width=8) + predicate:(sr_item_sk is not null and sr_ticket_number is not null) + TableScan [TS_61] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_85] + PartitionCols:_col1, _col8 + Merge Join Operator [MERGEJOIN_1084] (rows=14484878 width=661) + Conds:RS_82._col5=RS_1140._col0(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21","_col23","_col24","_col25","_col26"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1140] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1138] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_82] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1083] (rows=14484878 width=300) + Conds:RS_79._col6=RS_1181._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1181] + PartitionCols:_col0 + Select Operator [SEL_1180] (rows=1704 width=181) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1179] (rows=1704 width=181) + predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) + TableScan [TS_55] (rows=1704 width=181) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_79] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_1082] (rows=14484878 width=123) + Conds:RS_76._col4=RS_1130._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13","_col18"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1130] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1128] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_76] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_1081] (rows=14484878 width=119) + Conds:RS_73._col7=RS_1177._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1177] PartitionCols:_col0 - Select Operator [SEL_1145] (rows=2300 width=4) + Select Operator [SEL_1176] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_1144] (rows=2300 width=4) + Filter Operator [FIL_1175] (rows=2300 width=4) predicate:p_promo_sk is not null - TableScan [TS_55] (rows=2300 width=4) + TableScan [TS_49] (rows=2300 width=4) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_193] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_73] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1079] (rows=1608052 width=119) - Conds:RS_190._col1=RS_1171._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_190] + Merge Join Operator [MERGEJOIN_1080] (rows=14484878 width=119) + Conds:RS_70._col1=RS_1174._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_70] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1077] (rows=1608052 width=119) - Conds:RS_187._col0=RS_1100._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1100] + Merge Join Operator [MERGEJOIN_1078] (rows=14484878 width=119) + Conds:RS_67._col0=RS_1119._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1119] PartitionCols:_col0 - Select Operator [SEL_1096] (rows=652 width=4) + Select Operator [SEL_1115] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_1093] (rows=652 width=8) - predicate:((d_year = 2001) and d_date_sk is not null) + Filter Operator [FIL_1112] (rows=652 width=8) + predicate:((d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] - <-Reducer 38 [SIMPLE_EDGE] - SHUFFLE [RS_187] + <-Reducer 39 [SIMPLE_EDGE] + SHUFFLE [RS_67] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1076] (rows=4503592 width=119) - Conds:RS_1164._col1=RS_1121._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 36 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1121] + Merge Join Operator [MERGEJOIN_1077] (rows=40567099 width=314) + Conds:RS_1151._col1=RS_1154._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 40 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1154] PartitionCols:_col0 - Select Operator [SEL_1118] (rows=518 width=111) + Select Operator [SEL_1153] (rows=4666 width=111) Output:["_col0","_col1"] - Filter Operator [FIL_1117] (rows=518 width=312) - predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) - TableScan [TS_34] (rows=462000 width=311) + Filter Operator [FIL_1152] (rows=4666 width=311) + predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 36 AND 45 and i_item_sk is not null) + TableScan [TS_28] (rows=462000 width=311) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] - <-Map 50 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1164] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1151] PartitionCols:_col1 - Select Operator [SEL_1163] (rows=417313408 width=355) + Select Operator [SEL_1150] (rows=417313408 width=355) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1162] (rows=417313408 width=355) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_185_item_i_item_sk_min) AND DynamicValue(RS_185_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_185_item_i_item_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_157] (rows=575995635 width=355) + Filter Operator [FIL_1149] (rows=417313408 width=355) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_68_d1_d_date_sk_min) AND DynamicValue(RS_68_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_68_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_25] (rows=575995635 width=355) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 39 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1160] - Group By Operator [GBY_1159] (rows=1 width=12) + <-Reducer 25 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1148] + Group By Operator [GBY_1147] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 36 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1126] - Group By Operator [GBY_1124] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1125] + Group By Operator [GBY_1123] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1122] (rows=518 width=4) + Select Operator [SEL_1120] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1118] - <-Reducer 45 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_1171] + Please refer to the previous Select Operator [SEL_1115] + <-Reducer 46 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_1174] PartitionCols:_col0 - Select Operator [SEL_1170] (rows=13257 width=4) + Select Operator [SEL_1173] (rows=13257 width=4) Output:["_col0"] - Filter Operator [FIL_1169] (rows=13257 width=228) + Filter Operator [FIL_1172] (rows=13257 width=228) predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1168] (rows=39773 width=228) + Group By Operator [GBY_1171] (rows=39773 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 44 [SIMPLE_EDGE] - SHUFFLE [RS_177] + <-Reducer 45 [SIMPLE_EDGE] + SHUFFLE [RS_45] PartitionCols:_col0 - Group By Operator [GBY_176] (rows=6482999 width=228) + Group By Operator [GBY_44] (rows=6482999 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 - Merge Join Operator [MERGEJOIN_1078] (rows=183085709 width=227) - Conds:RS_1167._col0, _col1=RS_1139._col0, _col1(Inner),Output:["_col0","_col2","_col5"] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1139] + Merge Join Operator [MERGEJOIN_1079] (rows=183085709 width=227) + Conds:RS_1166._col0, _col1=RS_1169._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1169] PartitionCols:_col0, _col1 - Select Operator [SEL_1137] (rows=28798881 width=120) + Select Operator [SEL_1168] (rows=28798881 width=120) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1136] (rows=28798881 width=337) + Filter Operator [FIL_1167] (rows=28798881 width=337) predicate:(cr_item_sk is not null and cr_order_number is not null) - TableScan [TS_43] (rows=28798881 width=337) + TableScan [TS_37] (rows=28798881 width=337) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] - <-Map 51 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1167] + <-Map 44 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1166] PartitionCols:_col0, _col1 - Select Operator [SEL_1166] (rows=287989836 width=119) + Select Operator [SEL_1165] (rows=287989836 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1165] (rows=287989836 width=119) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_185_item_i_item_sk_min) AND DynamicValue(RS_185_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_185_item_i_item_sk_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_166] (rows=287989836 width=119) + Filter Operator [FIL_1164] (rows=287989836 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_65_item_i_item_sk_min) AND DynamicValue(RS_65_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_65_item_i_item_sk_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_34] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 39 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1161] - Please refer to the previous Group By Operator [GBY_1159] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1158] - PartitionCols:_col2, _col1, _col3 - Select Operator [SEL_1157] (rows=2299138 width=1354) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Group By Operator [GBY_1156] (rows=2299138 width=1362) + <-Reducer 41 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1163] + Group By Operator [GBY_1162] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1160] + Group By Operator [GBY_1158] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1155] (rows=4666 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1153] + <-Reducer 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1206] + PartitionCols:_col1, _col0, _col2 + Select Operator [SEL_1205] (rows=20709988 width=525) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_1204] (rows=20709988 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_123] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_247] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_122] (rows=2299138 width=1362) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col43)","sum(_col44)","sum(_col45)"],keys:_col28, _col46, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col47 - Select Operator [SEL_121] (rows=2331650 width=1292) - Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col47"] - Filter Operator [FIL_120] (rows=2331650 width=1292) - predicate:(_col51 <> _col19) - Merge Join Operator [MERGEJOIN_1068] (rows=2331650 width=1292) - Conds:RS_117._col37=RS_1114._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col47","_col51"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1114] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1113] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_117] - PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_1067] (rows=2299138 width=1205) - Conds:RS_114._col0=RS_115._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col47"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_114] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1056] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_115] - PartitionCols:_col16 - Select Operator [SEL_95] (rows=2651207 width=784) - Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col27"] - Merge Join Operator [MERGEJOIN_1066] (rows=2651207 width=784) - Conds:RS_92._col5, _col12=RS_1154._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col17","_col21","_col22","_col24","_col25","_col26","_col27"] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1154] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1153] - <-Reducer 21 [SIMPLE_EDGE] - SHUFFLE [RS_92] - PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_1065] (rows=1608052 width=657) - Conds:RS_89._col9=RS_1110._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col17","_col21","_col22","_col24","_col25","_col26","_col27"] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1110] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1108] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_89] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1064] (rows=1608052 width=296) - Conds:RS_86._col10=RS_1150._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col17","_col21","_col22"] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1150] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1149] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_86] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_1063] (rows=1608052 width=119) - Conds:RS_83._col0=RS_84._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col17"] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_83] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1053] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_84] - PartitionCols:_col5 - Select Operator [SEL_70] (rows=1608052 width=119) - Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_1062] (rows=1608052 width=119) - Conds:RS_67._col7=RS_1146._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1146] + Group By Operator [GBY_246] (rows=20709988 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col42)","sum(_col43)","sum(_col44)"],keys:_col28, _col45, _col29, _col7, _col9, _col14, _col15, _col16, _col17, _col23, _col24, _col25, _col26, _col46 + Merge Join Operator [MERGEJOIN_1106] (rows=21002852 width=1122) + Conds:RS_242._col31=RS_1136._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col42","_col43","_col44","_col45","_col46"] + <-Map 36 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1136] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1133] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_242] + PartitionCols:_col31 + Filter Operator [FIL_241] (rows=21002852 width=1296) + predicate:(_col50 <> _col19) + Merge Join Operator [MERGEJOIN_1105] (rows=21002852 width=1296) + Conds:RS_238._col36=RS_1146._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col42","_col43","_col44","_col45","_col46","_col50"] + <-Map 53 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1146] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1143] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_238] + PartitionCols:_col36 + Merge Join Operator [MERGEJOIN_1104] (rows=20709988 width=1209) + Conds:RS_235._col0=RS_236._col15(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col31","_col36","_col42","_col43","_col44","_col45","_col46"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_235] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_1076] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_236] + PartitionCols:_col15 + Select Operator [SEL_213] (rows=23881330 width=788) + Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col11","_col15","_col16","_col22","_col23","_col24","_col25","_col26"] + Merge Join Operator [MERGEJOIN_1103] (rows=23881330 width=788) + Conds:RS_210._col1, _col8=RS_1186._col0, _col1(Inner),Output:["_col2","_col3","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21","_col23","_col24","_col25","_col26"] + <-Map 52 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1186] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1184] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_210] + PartitionCols:_col1, _col8 + Merge Join Operator [MERGEJOIN_1102] (rows=14484878 width=661) + Conds:RS_207._col5=RS_1141._col0(Inner),Output:["_col1","_col2","_col3","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21","_col23","_col24","_col25","_col26"] + <-Map 37 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1141] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1138] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_207] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_1101] (rows=14484878 width=300) + Conds:RS_204._col6=RS_1182._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col18","_col20","_col21"] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1182] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1180] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_204] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_1100] (rows=14484878 width=123) + Conds:RS_201._col4=RS_1131._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13","_col18"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1131] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1128] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_201] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_1099] (rows=14484878 width=119) + Conds:RS_198._col7=RS_1178._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1178] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1145] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_67] + Please refer to the previous Select Operator [SEL_1176] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_198] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1061] (rows=1608052 width=119) - Conds:RS_64._col1=RS_1143._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_64] + Merge Join Operator [MERGEJOIN_1098] (rows=14484878 width=119) + Conds:RS_195._col1=RS_1203._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_195] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1059] (rows=1608052 width=119) - Conds:RS_61._col0=RS_1099._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1099] + Merge Join Operator [MERGEJOIN_1096] (rows=14484878 width=119) + Conds:RS_192._col0=RS_1121._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1121] PartitionCols:_col0 - Select Operator [SEL_1095] (rows=652 width=4) + Select Operator [SEL_1116] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_1092] (rows=652 width=8) - predicate:((d_year = 2000) and d_date_sk is not null) + Filter Operator [FIL_1113] (rows=652 width=8) + predicate:((d_year = 2001) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] - <-Reducer 35 [SIMPLE_EDGE] - SHUFFLE [RS_61] + <-Reducer 42 [SIMPLE_EDGE] + SHUFFLE [RS_192] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1058] (rows=4503592 width=119) - Conds:RS_1132._col1=RS_1119._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 36 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1119] + Merge Join Operator [MERGEJOIN_1095] (rows=40567099 width=314) + Conds:RS_1194._col1=RS_1156._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 40 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1156] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1118] - <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1132] + Please refer to the previous Select Operator [SEL_1153] + <-Map 54 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1194] PartitionCols:_col1 - Select Operator [SEL_1131] (rows=417313408 width=355) + Select Operator [SEL_1193] (rows=417313408 width=355) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1130] (rows=417313408 width=355) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_59_item_i_item_sk_min) AND DynamicValue(RS_59_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_59_item_i_item_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_31] (rows=575995635 width=355) + Filter Operator [FIL_1192] (rows=417313408 width=355) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_193_d1_d_date_sk_min) AND DynamicValue(RS_193_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_193_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_150] (rows=575995635 width=355) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 37 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1128] - Group By Operator [GBY_1127] (rows=1 width=12) + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1191] + Group By Operator [GBY_1190] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 36 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1125] - Group By Operator [GBY_1123] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1126] + Group By Operator [GBY_1124] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1120] (rows=518 width=4) + Select Operator [SEL_1122] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1118] - <-Reducer 42 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_1143] + Please refer to the previous Select Operator [SEL_1116] + <-Reducer 49 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_1203] PartitionCols:_col0 - Select Operator [SEL_1142] (rows=13257 width=4) + Select Operator [SEL_1202] (rows=13257 width=4) Output:["_col0"] - Filter Operator [FIL_1141] (rows=13257 width=228) + Filter Operator [FIL_1201] (rows=13257 width=228) predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1140] (rows=39773 width=228) + Group By Operator [GBY_1200] (rows=39773 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 41 [SIMPLE_EDGE] - SHUFFLE [RS_51] + <-Reducer 48 [SIMPLE_EDGE] + SHUFFLE [RS_170] PartitionCols:_col0 - Group By Operator [GBY_50] (rows=6482999 width=228) + Group By Operator [GBY_169] (rows=6482999 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 - Merge Join Operator [MERGEJOIN_1060] (rows=183085709 width=227) - Conds:RS_1135._col0, _col1=RS_1138._col0, _col1(Inner),Output:["_col0","_col2","_col5"] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1138] + Merge Join Operator [MERGEJOIN_1097] (rows=183085709 width=227) + Conds:RS_1199._col0, _col1=RS_1170._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1170] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1137] - <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1135] + Please refer to the previous Select Operator [SEL_1168] + <-Map 55 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1199] PartitionCols:_col0, _col1 - Select Operator [SEL_1134] (rows=287989836 width=119) + Select Operator [SEL_1198] (rows=287989836 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1133] (rows=287989836 width=119) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_59_item_i_item_sk_min) AND DynamicValue(RS_59_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_59_item_i_item_sk_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_40] (rows=287989836 width=119) + Filter Operator [FIL_1197] (rows=287989836 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_190_item_i_item_sk_min) AND DynamicValue(RS_190_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_190_item_i_item_sk_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_159] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 37 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1129] - Please refer to the previous Group By Operator [GBY_1127] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1196] + Group By Operator [GBY_1195] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 40 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1161] + Group By Operator [GBY_1159] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1157] (rows=4666 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1153] diff --git ql/src/test/results/clientpositive/perf/tez/query85.q.out ql/src/test/results/clientpositive/perf/tez/query85.q.out index 1ada3945fd..5b8ad0ba57 100644 --- ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -183,155 +183,153 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 17 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 16 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 17 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 10 vectorized - File Output Operator [FS_240] - Limit [LIM_239] (rows=7 width=832) + Reducer 6 vectorized + File Output Operator [FS_237] + Limit [LIM_236] (rows=72 width=832) Number of rows:100 - Select Operator [SEL_238] (rows=7 width=832) + Select Operator [SEL_235] (rows=72 width=832) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] - Select Operator [SEL_236] (rows=7 width=832) + <-Reducer 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_234] + Select Operator [SEL_233] (rows=72 width=832) Output:["_col4","_col5","_col6","_col7"] - Group By Operator [GBY_235] (rows=7 width=353) + Group By Operator [GBY_232] (rows=72 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_49] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_50] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=7 width=353) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col25 - Select Operator [SEL_47] (rows=16740 width=136) - Output:["_col6","_col7","_col12","_col25"] - Filter Operator [FIL_46] (rows=16740 width=136) - predicate:((_col33 and _col34 and _col16) or (_col35 and _col36 and _col17) or (_col37 and _col38 and _col18)) - Merge Join Operator [MERGEJOIN_206] (rows=44640 width=136) - Conds:RS_43._col1, _col20, _col21=RS_224._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col16","_col17","_col18","_col25","_col33","_col34","_col35","_col36","_col37","_col38"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_222] (rows=265971 width=207) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_221] (rows=265971 width=183) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_21] (rows=1861800 width=183) - default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col1, _col20, _col21 - Filter Operator [FIL_42] (rows=44640 width=315) - predicate:((_col27 and _col13) or (_col28 and _col14) or (_col29 and _col15)) - Merge Join Operator [MERGEJOIN_205] (rows=59520 width=315) - Conds:RS_39._col2=RS_234._col0(Inner),Output:["_col1","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col20","_col21","_col25","_col27","_col28","_col29"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + Group By Operator [GBY_49] (rows=288 width=353) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","count(_col4)","sum(_col28)","count(_col28)","sum(_col27)","count(_col27)"],keys:_col37 + Merge Join Operator [MERGEJOIN_203] (rows=2912400 width=313) + Conds:RS_45._col2=RS_231._col0(Inner),Output:["_col4","_col27","_col28","_col37"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0 + Select Operator [SEL_230] (rows=4602 width=4) + Output:["_col0"] + Filter Operator [FIL_229] (rows=4602 width=4) + predicate:wp_web_page_sk is not null + TableScan [TS_34] (rows=4602 width=4) + default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col2 + Filter Operator [FIL_43] (rows=2912400 width=377) + predicate:(((_col15 and _col16 and _col8) or (_col17 and _col18 and _col9) or (_col19 and _col20 and _col10)) and ((_col30 and _col5) or (_col31 and _col6) or (_col32 and _col7))) + Merge Join Operator [MERGEJOIN_202] (rows=10355208 width=377) + Conds:RS_40._col1, _col3=RS_41._col9, _col14(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col15","_col16","_col17","_col18","_col19","_col20","_col27","_col28","_col30","_col31","_col32","_col37"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col9, _col14 + Select Operator [SEL_33] (rows=1056644 width=250) + Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col14","_col15","_col16","_col18","_col19","_col20","_col25"] + Merge Join Operator [MERGEJOIN_201] (rows=1056644 width=250) + Conds:RS_30._col4=RS_228._col0(Inner),Output:["_col0","_col5","_col6","_col7","_col9","_col10","_col11","_col18","_col19","_col20","_col21","_col22","_col23","_col25"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col0 + Select Operator [SEL_227] (rows=72 width=101) + Output:["_col0","_col1"] + Filter Operator [FIL_226] (rows=72 width=101) + predicate:r_reason_sk is not null + TableScan [TS_18] (rows=72 width=101) + default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_200] (rows=1056644 width=155) + Conds:RS_27._col1, _col13, _col14=RS_224._col0, _col1, _col2(Inner),Output:["_col0","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col18","_col19","_col20","_col21","_col22","_col23"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_222] (rows=265971 width=207) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_221] (rows=265971 width=183) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_15] (rows=1861800 width=183) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col1, _col13, _col14 + Merge Join Operator [MERGEJOIN_199] (rows=1056644 width=312) + Conds:RS_24._col3=RS_225._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col13","_col14"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_223] (rows=265971 width=183) + Output:["_col0","_col1","_col2"] + Please refer to the previous Filter Operator [FIL_221] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_198] (rows=1056644 width=135) + Conds:RS_217._col2=RS_220._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + PartitionCols:_col0 + Select Operator [SEL_219] (rows=3529412 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_218] (rows=3529412 width=187) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_9] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col2 + Select Operator [SEL_216] (rows=11975292 width=237) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_215] (rows=11975292 width=237) + predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) + TableScan [TS_6] (rows=14398467 width=237) + default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_197] (rows=51392014 width=39) + Conds:RS_214._col0=RS_206._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_206] PartitionCols:_col0 - Select Operator [SEL_233] (rows=3529412 width=16) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_232] (rows=3529412 width=187) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_18] (rows=40000000 width=187) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_204] (rows=674551 width=350) - Conds:RS_36._col4=RS_231._col0(Inner),Output:["_col1","_col2","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col20","_col21","_col25"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col0 - Select Operator [SEL_230] (rows=72 width=101) - Output:["_col0","_col1"] - Filter Operator [FIL_229] (rows=72 width=101) - predicate:r_reason_sk is not null - TableScan [TS_15] (rows=72 width=101) - default@reason,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_sk","r_reason_desc"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_203] (rows=674551 width=254) - Conds:RS_33._col10=RS_228._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col20","_col21"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - PartitionCols:_col0 - Select Operator [SEL_227] (rows=4602 width=4) - Output:["_col0"] - Filter Operator [FIL_226] (rows=4602 width=4) - predicate:wp_web_page_sk is not null - TableScan [TS_12] (rows=4602 width=4) - default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_202] (rows=674551 width=258) - Conds:RS_30._col8=RS_212._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col10","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col20","_col21"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_210] (rows=652 width=8) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_9] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_201] (rows=1889180 width=383) - Conds:RS_27._col3=RS_225._col0(Inner),Output:["_col1","_col2","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col20","_col21"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - PartitionCols:_col0 - Select Operator [SEL_223] (rows=265971 width=183) - Output:["_col0","_col1","_col2"] - Please refer to the previous Filter Operator [FIL_221] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_200] (rows=13039884 width=266) - Conds:RS_209._col0, _col5=RS_220._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0, _col5 - Select Operator [SEL_208] (rows=11975292 width=237) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_207] (rows=11975292 width=237) - predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) - TableScan [TS_0] (rows=14398467 width=237) - default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] - PartitionCols:_col1, _col3 - Select Operator [SEL_219] (rows=15992347 width=43) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_218] (rows=15992347 width=243) - predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_3] (rows=144002668 width=243) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - Group By Operator [GBY_214] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_211] + Select Operator [SEL_205] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_204] (rows=652 width=8) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_213] (rows=143931136 width=43) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_212] (rows=143931136 width=243) + predicate:(((ws_net_profit >= 100) or (ws_net_profit <= 200) or ws_net_profit is not null or (ws_net_profit <= 300) or (ws_net_profit <= 250)) and ((ws_sales_price >= 100) or (ws_sales_price <= 150) or ws_sales_price is not null or (ws_sales_price <= 200)) and (ws_sold_date_sk BETWEEN DynamicValue(RS_38_date_dim_d_date_sk_min) AND DynamicValue(RS_38_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_38_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_0] (rows=144002668 width=243) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_209] + Group By Operator [GBY_208] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_207] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_205] diff --git ql/src/test/results/clientpositive/rand_partitionpruner3.q.out ql/src/test/results/clientpositive/rand_partitionpruner3.q.out index 008b761e9f..5e19d87f1e 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner3.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner3.q.out @@ -8,9 +8,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` -FROM `default`.`srcpart` -WHERE RAND(1) < 0.1 AND `ds` = '2008-04-08' AND `key` <= 50 AND `key` >= 10 AND `hr` LIKE '%2' STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -25,7 +22,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D) and (rand(1) < 0.1D)) (type: boolean) + predicate: ((rand(1) < 0.1D) and UDFToDouble(key) BETWEEN 10.0D AND 50.0D) (type: boolean) Statistics: Num rows: 18 Data size: 6516 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) @@ -140,9 +137,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` -FROM `default`.`srcpart` -WHERE `ds` = '2008-04-08' AND `key` <= 50 AND `key` >= 10 AND `hr` LIKE '%2' STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -153,12 +147,12 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean) + filterExpr: ((ds = '2008-04-08') and UDFToDouble(key) BETWEEN 10.0D AND 50.0D and (hr like '%2')) (type: boolean) Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((UDFToDouble(key) <= 50.0D) and (UDFToDouble(key) >= 10.0D)) (type: boolean) + predicate: UDFToDouble(key) BETWEEN 10.0D AND 50.0D (type: boolean) Statistics: Num rows: 55 Data size: 19910 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) diff --git ql/src/test/results/clientpositive/select_unquote_or.q.out ql/src/test/results/clientpositive/select_unquote_or.q.out index 4b9ee0cf1c..bb4df3926e 100644 --- ql/src/test/results/clientpositive/select_unquote_or.q.out +++ ql/src/test/results/clientpositive/select_unquote_or.q.out @@ -61,18 +61,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: npe_test - filterExpr: ((UDFToDouble(ds) > 1970.0D) or (UDFToDouble(ds) < 1985.0D)) (type: boolean) + filterExpr: UDFToDouble(ds) NOT BETWEEN 1985.0D AND 1970.0D (type: boolean) Statistics: Num rows: 498 Data size: 180276 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((UDFToDouble(ds) < 1985.0D) or (UDFToDouble(ds) > 1970.0D)) (type: boolean) - Statistics: Num rows: 332 Data size: 120184 Basic stats: COMPLETE Column stats: COMPLETE + predicate: UDFToDouble(ds) NOT BETWEEN 1985.0D AND 1970.0D (type: boolean) + Statistics: Num rows: 443 Data size: 160366 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 332 Data size: 120184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 443 Data size: 160366 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 332 Data size: 120184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 443 Data size: 160366 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/smb_mapjoin_47.q.out ql/src/test/results/clientpositive/smb_mapjoin_47.q.out index 57a543cda2..20a7348f64 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_47.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_47.q.out @@ -549,14 +549,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col0 + _col3) >= 100)} {((_col0 + _col3) <= 102)} - Statistics: Num rows: 2 Data size: 382 Basic stats: COMPLETE Column stats: COMPLETE + residual filter predicates: {(_col0 + _col3) BETWEEN 100 AND 102} + Statistics: Num rows: 12 Data size: 2292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 2 Data size: 382 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 382 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/spark/join34.q.out ql/src/test/results/clientpositive/spark/join34.q.out index 7ec6094304..652f1af0d0 100644 --- ql/src/test/results/clientpositive/spark/join34.q.out +++ ql/src/test/results/clientpositive/spark/join34.q.out @@ -32,17 +32,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n1 -OPTIMIZED SQL: SELECT `t5`.`key`, `t5`.`value`, `t3`.`value` AS `value1` -FROM (SELECT `key`, `value` -FROM `default`.`src` -WHERE `key` < 20 -UNION ALL -SELECT `key`, `value` -FROM `default`.`src` -WHERE `key` > 100) AS `t3` -INNER JOIN (SELECT `key`, `value` -FROM `default`.`src1` -WHERE (`key` < 20 OR `key` > 100) AND `key` IS NOT NULL) AS `t5` ON `t3`.`key` = `t5`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -59,23 +48,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 296 Data size: 3144 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -137,23 +126,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 332 Data size: 3526 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 296 Data size: 3144 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: string) auto parallelism: false @@ -215,23 +204,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) - Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 175 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 175 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 175 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -299,17 +288,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 325 Data size: 3458 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 325 Data size: 3458 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 325 Data size: 3458 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/join35.q.out ql/src/test/results/clientpositive/spark/join35.q.out index 8794053873..cce7a77da1 100644 --- ql/src/test/results/clientpositive/spark/join35.q.out +++ ql/src/test/results/clientpositive/spark/join35.q.out @@ -32,19 +32,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1_n24 -OPTIMIZED SQL: SELECT `t5`.`key`, `t5`.`value`, `t3`.`$f1` AS `cnt` -FROM (SELECT `key`, COUNT(*) AS `$f1` -FROM `default`.`src` -WHERE `key` < 20 -GROUP BY `key` -UNION ALL -SELECT `key`, COUNT(*) AS `$f1` -FROM `default`.`src` -WHERE `key` > 100 -GROUP BY `key`) AS `t3` -INNER JOIN (SELECT `key`, `value` -FROM `default`.`src1` -WHERE (`key` < 20 OR `key` > 100) AND `key` IS NOT NULL) AS `t5` ON `t3`.`key` = `t5`.`key` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -63,25 +50,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (UDFToDouble(key) < 20.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) < 20.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) < 20.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -143,25 +130,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x1 - filterExpr: (UDFToDouble(key) > 100.0D) (type: boolean) + filterExpr: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (UDFToDouble(key) > 100.0D) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(key) > 100.0D) and UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D) (type: boolean) + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -223,23 +210,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - filterExpr: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) + filterExpr: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) < 20.0D) or (UDFToDouble(key) > 100.0D)) and key is not null) (type: boolean) - Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE + predicate: (UDFToDouble(key) NOT BETWEEN 20.0D AND 100.0D and key is not null) (type: boolean) + Statistics: Num rows: 23 Data size: 175 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 175 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 175 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string) auto parallelism: false @@ -306,13 +293,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 74 Data size: 786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1762 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: bigint) auto parallelism: false @@ -326,17 +313,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 162 Data size: 1729 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col3 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 162 Data size: 1729 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 162 Data size: 1729 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -373,13 +360,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 74 Data size: 786 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1762 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 148 Data size: 1572 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: bigint) auto parallelism: false diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out index 270f55ad9b..2c5b15aa3a 100644 --- ql/src/test/results/clientpositive/spark/pcr.q.out +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -3102,10 +3102,6 @@ POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 POSTHOOK: Input: default@pcr_t1@ds=2000-04-11 #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key`, `value`, `ds` -FROM `default`.`pcr_t1` -WHERE `ds` > '2000-04-08' AND `ds` < '2000-04-11' OR `ds` >= '2000-04-08' AND `ds` <= '2000-04-11' AND `key` = 2 -ORDER BY `key`, `value`, `ds` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git ql/src/test/results/clientpositive/spark/subquery_scalar.q.out ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index d7d652f34c..82b080e8d5 100644 --- ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -805,8 +805,7 @@ POSTHOOK: Input: default@part_null_n0 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size between (select min(p_size) from part) and (select avg(p_size) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -823,10 +822,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -835,15 +833,15 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) Execution mode: vectorized - Map 4 + Map 3 Map Operator Tree: TableScan alias: part @@ -862,7 +860,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -885,41 +883,29 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 26 Data size: 3277 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 26 Data size: 3277 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 26 Data size: 3719 Basic stats: COMPLETE Column stats: NONE + 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 26 Data size: 3693 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(_col5) BETWEEN UDFToDouble(_col9) AND _col10 (type: boolean) - Statistics: Num rows: 2 Data size: 286 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col10 <= _col9) and (_col9 <= _col11)) (type: boolean) + Statistics: Num rows: 2 Data size: 284 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 284 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 284 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -927,11 +913,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: UDFToDouble(_col0) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reducer 7 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -954,8 +944,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[16][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[19][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_size between (select min(p_size) from part) and (select avg(p_size) from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -1749,7 +1738,7 @@ POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size BETWEEN (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND (select max(p_size) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -1778,17 +1767,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part + filterExpr: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized Map 4 Map Operator Tree: @@ -1835,36 +1828,39 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col4 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: boolean) + Filter Operator + predicate: (_col9 <= _col5) (type: boolean) + Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 28 Data size: 3601 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11 + Statistics: Num rows: 9 Data size: 1157 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN (_col10 is null) THEN (_col5 BETWEEN null AND _col12) ELSE (_col5 BETWEEN _col9 AND _col12) END (type: boolean) - Statistics: Num rows: 14 Data size: 1800 Basic stats: COMPLETE Column stats: NONE + predicate: (_col5 <= _col11) (type: boolean) + Statistics: Num rows: 3 Data size: 385 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 1800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 385 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 14 Data size: 1800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 385 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1879,15 +1875,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: _col1 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col2 (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: boolean) + value expressions: _col0 (type: int) Reducer 7 Execution mode: vectorized Reduce Operator Tree: @@ -1907,7 +1903,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_size BETWEEN (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND (select max(p_size) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git ql/src/test/results/clientpositive/udf_between.q.out ql/src/test/results/clientpositive/udf_between.q.out index 9ba685adcb..1a4eb076ef 100644 --- ql/src/test/results/clientpositive/udf_between.q.out +++ ql/src/test/results/clientpositive/udf_between.q.out @@ -88,10 +88,10 @@ STAGE PLANS: Processor Tree: TableScan alias: src - filterExpr: (not (UDFToDouble(key) + 100.0D) BETWEEN 100.0D AND 200.0D) (type: boolean) + filterExpr: (UDFToDouble(key) + 100.0D) NOT BETWEEN 100.0D AND 200.0D (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (not (UDFToDouble(key) + 100.0D) BETWEEN 100.0D AND 200.0D) (type: boolean) + predicate: (UDFToDouble(key) + 100.0D) NOT BETWEEN 100.0D AND 200.0D (type: boolean) Statistics: Num rows: 445 Data size: 79210 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -204,3 +204,177 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 238 val_238 +PREHOOK: query: create table t(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.i SCRIPT [] +PREHOOK: query: SELECT * FROM t where i between 8 and 9 + or i between 9 and 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t where i between 8 and 9 + or i between 9 and 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +8 +9 +10 +PREHOOK: query: explain +SELECT * FROM t where i between 8 and 9 + or i between 9 and 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM t where i between 8 and 9 + or i between 9 and 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t + filterExpr: (i BETWEEN 8 AND 9 or i BETWEEN 9 AND 10) (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i BETWEEN 8 AND 9 or i BETWEEN 9 AND 10) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: SELECT * FROM t where i between 8 and 9 + or i between 9 and 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t where i between 8 and 9 + or i between 9 and 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +8 +9 +10 +PREHOOK: query: explain +SELECT * FROM t where i between 6 and 7 + or i between 9 and 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM t where i between 6 and 7 + or i between 9 and 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t + filterExpr: (i BETWEEN 6 AND 7 or i BETWEEN 9 AND 10) (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i BETWEEN 6 AND 7 or i BETWEEN 9 AND 10) (type: boolean) + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: SELECT * FROM t where i between 6 and 7 + or i between 9 and 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t where i between 6 and 7 + or i between 9 and 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +6 +7 +9 +10 +PREHOOK: query: explain +SELECT * FROM t where i not between 6 and 7 + and i not between 9 and 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM t where i not between 6 and 7 + and i not between 9 and 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: t + filterExpr: (i NOT BETWEEN 6 AND 7 and i NOT BETWEEN 9 AND 10) (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i NOT BETWEEN 6 AND 7 and i NOT BETWEEN 9 AND 10) (type: boolean) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: SELECT * FROM t where i not between 6 and 7 + and i not between 9 and 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM t where i not between 6 and 7 + and i not between 9 and 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +8 +11 diff --git ql/src/test/results/clientpositive/vector_between_columns.q.out ql/src/test/results/clientpositive/vector_between_columns.q.out index 433650503e..28bc1cd671 100644 --- ql/src/test/results/clientpositive/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/vector_between_columns.q.out @@ -157,13 +157,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 36 Data size: 708 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col4 AND _col4) THEN ('Ok') ELSE ('NoOk') END (type: string) + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint), CASE WHEN (_col4 BETWEEN _col1 AND _col1) THEN ('Ok') ELSE ('NoOk') END (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1, 3, 6] - selectExpressions: IfExprStringScalarStringScalar(col 5:boolean, val Ok, val NoOk)(children: VectorUDFAdaptor(_col1 BETWEEN _col4 AND _col4) -> 5:boolean) -> 6:string + selectExpressions: IfExprStringScalarStringScalar(col 5:boolean, val Ok, val NoOk)(children: VectorUDFAdaptor(_col4 BETWEEN _col1 AND _col1) -> 5:boolean) -> 6:string Statistics: Num rows: 36 Data size: 7192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -320,8 +320,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsTrue(col 5:boolean)(children: VectorUDFAdaptor(_col1 BETWEEN _col4 AND _col4) -> 5:boolean) - predicate: _col1 BETWEEN _col4 AND _col4 (type: boolean) + predicateExpression: SelectColumnIsTrue(col 5:boolean)(children: VectorUDFAdaptor(_col4 BETWEEN _col1 AND _col1) -> 5:boolean) + predicate: _col4 BETWEEN _col1 AND _col1 (type: boolean) Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), _col3 (type: smallint)