diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 9cd6812ce4..683d0fd01f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -687,6 +687,77 @@ public boolean accept(ExprNodeDescEqualityWrapper e) { } } + private ExprNodeDesc rewriteBetweenToIn(final ExprNodeDesc comparisonExpression, final ExprNodeDesc leftExpression, + final ExprNodeDesc rightExpression, boolean invert) { + // difference in BETWEEN values could be millions, since for each value a new ExprNodeConstantDesc is created + // we should limit the rewrite to avoid taking too much memory + final int REWRITE_THRESHOLD = 100; + + boolean shouldRewrite = false; + long startVal = 0, endVal = 0; + + if (ExprNodeDescUtils.isIntegerType(comparisonExpression) + && leftExpression instanceof ExprNodeConstantDesc + && rightExpression instanceof ExprNodeConstantDesc) { + Object leftValue = ((ExprNodeConstantDesc) leftExpression).getValue(); + Object rightValue = ((ExprNodeConstantDesc) rightExpression).getValue(); + + if (leftValue instanceof Integer) { + startVal = ((Integer) leftValue).longValue(); + endVal = ((Integer) rightValue).longValue(); + } else if (leftValue instanceof Long) { + startVal = ((Long) leftValue).longValue(); + endVal = ((Long) rightValue).longValue(); + } else { + startVal = ((Short) leftValue).longValue(); + endVal = ((Short) rightValue).longValue(); + } + + // BETWEEN could be (10,0) + if(startVal > endVal) { + Long tmpVal = startVal; + startVal = endVal; + endVal = tmpVal; + } + + if ((endVal - startVal) <= REWRITE_THRESHOLD) { + shouldRewrite = true; + } + } + + if (shouldRewrite) { + + List constantExprs = new ArrayList<>(); + constantExprs.add(comparisonExpression); + //generate list of contiguous integers + for (long i = startVal; i <= endVal; i++) { + ExprNodeConstantDesc constExpr = new ExprNodeConstantDesc(comparisonExpression.getTypeInfo(), i); + constantExprs.add(constExpr); + } + ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFIn(), constantExprs); + return newExpression; + } else { + // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). + // This is more straightforward, as the evaluateExpression method will deal with + // generating the final row count relying on the basic comparator evaluation methods + final ExprNodeDesc leftComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPEqualOrGreaterThan(), + Lists.newArrayList(comparisonExpression, leftExpression)); + final ExprNodeDesc rightComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPEqualOrLessThan(), + Lists.newArrayList(comparisonExpression, rightExpression)); + ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPAnd(), + Lists.newArrayList(leftComparator, rightComparator)); + if (invert) { + newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + new GenericUDFOPNot(), Lists.newArrayList(newExpression)); + } + return newExpression; + } + } + private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List neededCols, Operator op) throws SemanticException { final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; @@ -702,19 +773,7 @@ private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currN return currNumRows; } - // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). - // This is more straightforward, as the evaluateExpression method will deal with - // generating the final row count relying on the basic comparator evaluation methods - final ExprNodeDesc leftComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - new GenericUDFOPEqualOrGreaterThan(), Lists.newArrayList(comparisonExpression, leftExpression)); - final ExprNodeDesc rightComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - new GenericUDFOPEqualOrLessThan(), Lists.newArrayList(comparisonExpression, rightExpression)); - ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - new GenericUDFOPAnd(), Lists.newArrayList(leftComparator, rightComparator)); - if (invert) { - newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, - new GenericUDFOPNot(), Lists.newArrayList(newExpression)); - } + ExprNodeDesc newExpression = rewriteBetweenToIn(comparisonExpression, leftExpression, rightExpression, invert); return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, currNumRows); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index 5275561619..ed6d8de730 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -938,7 +938,7 @@ public static boolean isSame(List first, List second return true; } - // Given an expression this method figures out of the type for the expression belongs to string group + // Given an expression this method figures out if the type for the expression belongs to string group // e.g. (String, Char, Varchar etc) public static boolean isStringType(ExprNodeDesc expr) { TypeInfo typeInfo = expr.getTypeInfo(); @@ -951,4 +951,18 @@ public static boolean isStringType(ExprNodeDesc expr) { } return false; } + // Given an expression this method figures out if the type for the expression is integer + // i.e. INT, SHORT or LONG + public static boolean isIntegerType(ExprNodeDesc expr) { + TypeInfo typeInfo = expr.getTypeInfo(); + if (typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE) { + PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + if(primitiveCategory == PrimitiveCategory.INT + || primitiveCategory == PrimitiveCategory.SHORT + || primitiveCategory == PrimitiveCategory.LONG){ + return true; + } + } + return false; + } } diff --git a/ql/src/test/results/clientpositive/join46.q.out b/ql/src/test/results/clientpositive/join46.q.out index c192194329..dbaddcc68b 100644 --- a/ql/src/test/results/clientpositive/join46.q.out +++ b/ql/src/test/results/clientpositive/join46.q.out @@ -170,16 +170,16 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator @@ -269,14 +269,14 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator @@ -289,10 +289,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index af7b0ba2ba..6b263272da 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -789,15 +789,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10.0D) and (not (s like '%car%')) and (s like '%son') and (t > 0Y) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -808,13 +808,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -868,15 +868,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10.0D) and (not (s like '%car%')) and (s like '%son') and (t > 0Y) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -887,13 +887,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1013,15 +1013,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101Y) and (t > 0Y) and (t > 10Y) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1032,14 +1032,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1048,13 +1048,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1113,15 +1113,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101Y) and (t > 0Y) and (t > 10Y) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1132,14 +1132,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1148,13 +1148,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index 84c5b2ff5e..248be63ca4 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -727,15 +727,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10.0D) and (not (s like '%car%')) and (s like '%son') and (t > 0Y) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -746,13 +746,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -806,15 +806,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10.0D) and (not (s like '%car%')) and (s like '%son') and (t > 0Y) and UDFToInteger(si) BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -825,13 +825,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 565 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -996,15 +996,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101Y) and (t > 0Y) and (t > 10Y) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1015,14 +1015,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1031,13 +1031,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1096,15 +1096,15 @@ STAGE PLANS: Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d < 12.0D) and (d >= 10) and (not (s like '%car%')) and (s like '%son') and (t <> 101Y) and (t > 0Y) and (t > 10Y) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1115,14 +1115,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1131,13 +1131,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 339 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 83a94f8856..e035219e79 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -3790,9 +3790,9 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_size BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/mapjoin46.q.out b/ql/src/test/results/clientpositive/mapjoin46.q.out index a4f067f101..febb6c7940 100644 --- a/ql/src/test/results/clientpositive/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/mapjoin46.q.out @@ -166,11 +166,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator filter predicates: 0 {_col0 BETWEEN 100 AND 102} @@ -273,11 +273,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator filter predicates: 0 {_col0 BETWEEN 100 AND 102} @@ -306,10 +306,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out b/ql/src/test/results/clientpositive/perf/spark/query10.q.out index b7faa9a130..e85519d784 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out @@ -115,96 +115,21 @@ select limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 13 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 16 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (GROUP, 169) - Reducer 15 <- Map 14 (GROUP, 336) + Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) + Reducer 14 <- Reducer 13 (GROUP, 169) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 306), Map 19 (PARTITION-LEVEL SORT, 306) + Reducer 18 <- Reducer 17 (GROUP, 336) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855) Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) Reducer 5 <- Reducer 4 (GROUP, 1009) Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### @@ -230,6 +155,25 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: int) Execution mode: vectorized Map 11 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 12 Map Operator Tree: TableScan alias: web_sales @@ -242,30 +186,33 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 13 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 15 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 14 + Map 16 Map Operator Tree: TableScan alias: catalog_sales @@ -278,29 +225,32 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_ship_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 16 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 19 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 7 Map Operator Tree: TableScan @@ -353,34 +303,58 @@ STAGE PLANS: expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 10 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 12 + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reducer 14 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -398,7 +372,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 15 + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query22.q.out b/ql/src/test/results/clientpositive/perf/spark/query22.q.out index d0970bba16..2079262d2f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query22.q.out @@ -50,27 +50,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 6 + Map 7 Map Operator Tree: TableScan alias: warehouse @@ -94,9 +74,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 11), Map 7 (PARTITION-LEVEL SORT, 11) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 6), Map 6 (PARTITION-LEVEL SORT, 6) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 11), Reducer 2 (PARTITION-LEVEL SORT, 11) + Reducer 4 <- Reducer 3 (GROUP, 31) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -112,36 +93,33 @@ STAGE PLANS: expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_warehouse_sk (type: int), inv_quantity_on_hand (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 5 - Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 6 - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 37584000 Data size: 593821104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 7 + Map 6 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: item @@ -162,6 +140,34 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) Execution mode: vectorized Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 41342400 Data size: 653203228 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + input vertices: + 1 Map 7 + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 45476640 Data size: 718523566 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -183,7 +189,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Statistics: Num rows: 250121525 Data size: 3951879695 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: bigint), _col6 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -202,7 +208,7 @@ STAGE PLANS: sort order: +++++ Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query25.q.out b/ql/src/test/results/clientpositive/perf/spark/query25.q.out index 3d7f402870..d15c8cee67 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query25.q.out @@ -94,9 +94,7 @@ select POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -104,7 +102,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 14 + Map 16 Map Operator Tree: TableScan alias: store @@ -125,65 +123,17 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: d3 - filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 12 - Map Operator Tree: - TableScan - alias: d2 - filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 374), Reducer 9 (PARTITION-LEVEL SORT, 374) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 806), Reducer 9 (PARTITION-LEVEL SORT, 806) - Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486) + Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 806), Reducer 2 (PARTITION-LEVEL SORT, 806) + Reducer 4 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 3 (PARTITION-LEVEL SORT, 486) Reducer 5 <- Reducer 4 (GROUP, 582) Reducer 6 <- Reducer 5 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 374), Map 8 (PARTITION-LEVEL SORT, 374) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 306), Map 8 (PARTITION-LEVEL SORT, 306) #### A masked pattern was here #### Vertices: Map 1 @@ -207,6 +157,25 @@ STAGE PLANS: value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)) Execution mode: vectorized Map 11 + Map Operator Tree: + TableScan + alias: d3 + filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 12 Map Operator Tree: TableScan alias: store_returns @@ -219,26 +188,33 @@ STAGE PLANS: expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_net_loss (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - input vertices: - 1 Map 12 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 13 + Map 14 + Map Operator Tree: + TableScan + alias: d2 + filterExpr: (d_moy BETWEEN 4 AND 10 and (d_year = 2000) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 15 Map Operator Tree: TableScan alias: item @@ -290,25 +266,45 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int), cs_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 10 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int), _col1 (type: int) + 1 _col1 (type: int), _col2 (type: int) + outputColumnNames: _col3, _col8, _col9, _col10, _col11 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int) + sort order: +++ + Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)) Reducer 2 Reduce Operator Tree: Join Operator @@ -361,7 +357,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col5, _col12, _col20, _col25, _col26, _col28, _col29 input vertices: - 1 Map 14 + 1 Map 16 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col20), sum(_col12) @@ -414,16 +410,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int), _col1 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col3, _col8, _col9, _col10, _col11 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int) - sort order: +++ - Map-reduce partition columns: _col8 (type: int), _col9 (type: int), _col10 (type: int) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: decimal(7,2)), _col11 (type: decimal(7,2)) + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(7,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query28.q.out b/ql/src/test/results/clientpositive/perf/spark/query28.q.out index b437829c70..21d55cb138 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query28.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query28.q.out @@ -112,15 +112,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 15) + Reducer 11 <- Map 10 (GROUP, 133) Reducer 12 <- Reducer 11 (GROUP, 1) - Reducer 14 <- Map 13 (GROUP, 15) + Reducer 14 <- Map 13 (GROUP, 133) Reducer 15 <- Reducer 14 (GROUP, 1) - Reducer 17 <- Map 16 (GROUP, 15) + Reducer 17 <- Map 16 (GROUP, 133) Reducer 18 <- Reducer 17 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 15) + Reducer 5 <- Map 4 (GROUP, 133) Reducer 6 <- Reducer 5 (GROUP, 1) - Reducer 8 <- Map 7 (GROUP, 15) + Reducer 8 <- Map 7 (GROUP, 133) Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -132,22 +132,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Map 13 @@ -158,22 +158,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Map 16 @@ -184,22 +184,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52) and ss_quantity BETWEEN 6 AND 10) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Map 4 @@ -210,22 +210,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Map 7 @@ -236,22 +236,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Reducer 11 @@ -262,7 +262,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -302,7 +302,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -342,7 +342,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -382,7 +382,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -422,7 +422,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 @@ -458,7 +458,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 15) + Reducer 2 <- Map 1 (GROUP, 133) Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -470,22 +470,22 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) (type: boolean) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized Reducer 2 @@ -496,7 +496,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index a7347102c1..a3b0610f4a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -92,8 +92,7 @@ select POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -101,7 +100,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 16 Map Operator Tree: TableScan alias: store @@ -122,41 +121,17 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 13 - Map Operator Tree: - TableScan - alias: d2 - filterExpr: (d_moy BETWEEN 4 AND 7 and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 14 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) + Reducer 10 <- Map 15 (PARTITION-LEVEL SORT, 486), Reducer 9 (PARTITION-LEVEL SORT, 486) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 36), Map 14 (PARTITION-LEVEL SORT, 36) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 6 (PARTITION-LEVEL SORT, 306) Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 917), Reducer 2 (PARTITION-LEVEL SORT, 917) Reducer 4 <- Reducer 3 (GROUP, 640) Reducer 5 <- Reducer 4 (SORT, 1) Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) + Reducer 9 <- Reducer 13 (PARTITION-LEVEL SORT, 476), Reducer 8 (PARTITION-LEVEL SORT, 476) #### A masked pattern was here #### Vertices: Map 1 @@ -211,26 +186,33 @@ STAGE PLANS: expressions: sr_returned_date_sk (type: int), sr_item_sk (type: int), sr_customer_sk (type: int), sr_ticket_number (type: int), sr_return_quantity (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4 - input vertices: - 1 Map 13 - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 57591150 Data size: 4462194832 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 14 + Map Operator Tree: + TableScan + alias: d2 + filterExpr: (d_moy BETWEEN 4 AND 7 and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 15 Map Operator Tree: TableScan alias: item @@ -309,7 +291,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col5, _col10, _col11, _col13, _col18, _col19, _col21, _col22 input vertices: - 1 Map 15 + 1 Map 16 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: string), _col19 (type: string), _col5 (type: int), _col10 (type: int), _col11 (type: int), _col13 (type: int), _col21 (type: string), _col22 (type: string) @@ -321,6 +303,22 @@ STAGE PLANS: Map-reduce partition columns: _col14 (type: int), _col13 (type: int) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col8 (type: int), _col16 (type: int), _col21 (type: string), _col22 (type: string) + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 63350266 Data size: 4908414421 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int) Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out index 88279a30a7..b00389b381 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -112,7 +112,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 567), Reducer 6 (PARTITION-LEVEL SORT, 567) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 551), Reducer 6 (PARTITION-LEVEL SORT, 551) Reducer 3 <- Reducer 2 (SORT, 1) Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) Reducer 6 <- Reducer 5 (GROUP, 529) @@ -166,16 +166,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 16232 Data size: 18163608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -269,12 +269,12 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 BETWEEN 15 AND 20 (type: boolean) - Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: bigint) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/perf/spark/query38.q.out b/ql/src/test/results/clientpositive/perf/spark/query38.q.out index 0064177f96..e011869704 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query38.q.out @@ -43,68 +43,47 @@ select count(*) from ( limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int), d_date (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 306), Map 9 (PARTITION-LEVEL SORT, 306) + Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 873), Reducer 10 (PARTITION-LEVEL SORT, 873) + Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 369) + Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) + Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) + Reducer 18 <- Reducer 17 (GROUP PARTITION-LEVEL SORT, 186) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 13 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 2 (PARTITION-LEVEL SORT, 975) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 481) + Reducer 5 <- Reducer 12 (GROUP, 259), Reducer 18 (GROUP, 259), Reducer 4 (GROUP, 259) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 11 + Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 16 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -112,65 +91,19 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 10 <- Reducer 9 (GROUP PARTITION-LEVEL SORT, 369) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 706), Map 17 (PARTITION-LEVEL SORT, 706) - Reducer 15 <- Reducer 14 (GROUP PARTITION-LEVEL SORT, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Map 12 (PARTITION-LEVEL SORT, 975) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 481) - Reducer 4 <- Reducer 10 (GROUP, 259), Reducer 15 (GROUP, 259), Reducer 3 (GROUP, 259) - Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 873), Map 8 (PARTITION-LEVEL SORT, 873) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 6 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 12 + Map 14 Map Operator Tree: TableScan alias: customer @@ -190,7 +123,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 13 + Map 15 Map Operator Tree: TableScan alias: web_sales @@ -203,26 +136,34 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 16 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 19 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int), d_date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 17 + Map 20 Map Operator Tree: TableScan alias: customer @@ -242,7 +183,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 8 + Map 9 Map Operator Tree: TableScan alias: catalog_sales @@ -255,26 +196,50 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 11 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col3, _col6, _col7 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col7 (type: string), _col6 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -304,7 +269,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 14 + Reducer 16 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 17 Reduce Operator Tree: Join Operator condition map: @@ -324,7 +305,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reducer 15 + Reducer 18 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -355,6 +336,22 @@ STAGE PLANS: Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -374,7 +371,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -404,7 +401,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 304916424 Data size: 33091779879 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -432,7 +429,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -450,26 +447,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col6, _col7 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col7 (type: string), _col6 (type: string), _col3 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query51.q.out b/ql/src/test/results/clientpositive/perf/spark/query51.q.out index 21afbe2024..5651f8c324 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query51.q.out @@ -87,17 +87,43 @@ order by item_sk limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 10 <- Reducer 9 (GROUP PARTITION-LEVEL SORT, 169) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 437) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 303), Reducer 3 (PARTITION-LEVEL SORT, 303) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 241) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 154), Map 8 (PARTITION-LEVEL SORT, 154) #### A masked pattern was here #### Vertices: - Map 6 + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_sales_price (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Execution mode: vectorized + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -105,24 +131,19 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 9 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -130,68 +151,19 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 437) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 303), Reducer 8 (PARTITION-LEVEL SORT, 303) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 241) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 169) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_sales_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 - input vertices: - 1 Map 6 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col1 (type: int), _col4 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 7 + Map 8 Map Operator Tree: TableScan alias: web_sales @@ -204,32 +176,74 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 - input vertices: - 1 Map 9 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col1 (type: int), _col4 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: int, _col1: string, _col2: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 ASC NULLS FIRST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col2 + name: sum + window function: GenericUDAFSumHiveDecimal + window frame: ROWS PRECEDING(MAX)~CURRENT + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), sum_window_0 (type: decimal(27,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(27,2)) Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int), _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -267,7 +281,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(27,2)) - Reducer 3 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -283,7 +297,7 @@ STAGE PLANS: Map-reduce partition columns: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: int) Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: decimal(27,2)), _col3 (type: int), _col4 (type: string), _col5 (type: decimal(27,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -329,7 +343,7 @@ STAGE PLANS: Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -346,44 +360,28 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 + Reducer 9 Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col0: int, _col1: string, _col2: decimal(17,2) - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col1 ASC NULLS FIRST - partition by: _col0 - raw input shape: - window functions: - window function definition - alias: sum_window_0 - arguments: _col2 - name: sum - window function: GenericUDAFSumHiveDecimal - window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), sum_window_0 (type: decimal(27,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(27,2)) + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int), _col4 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query59.q.out b/ql/src/test/results/clientpositive/perf/spark/query59.q.out index 1224ab68a6..5164bfc1b4 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query59.q.out @@ -222,16 +222,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 7 Map Operator Tree: @@ -261,16 +261,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 11 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query65.q.out b/ql/src/test/results/clientpositive/perf/spark/query65.q.out index 4afa1a6666..982a1e19cd 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query65.q.out @@ -55,70 +55,20 @@ select limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 9 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 437) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 328), Reducer 2 (PARTITION-LEVEL SORT, 328), Reducer 8 (PARTITION-LEVEL SORT, 328) - Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 166), Reducer 3 (PARTITION-LEVEL SORT, 166) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 8 <- Map 7 (GROUP PARTITION-LEVEL SORT, 437) + Reducer 10 <- Reducer 9 (GROUP PARTITION-LEVEL SORT, 437) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Reducer 2 (GROUP, 437) + Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 328), Reducer 10 (PARTITION-LEVEL SORT, 328), Reducer 3 (PARTITION-LEVEL SORT, 328) + Reducer 5 <- Map 13 (PARTITION-LEVEL SORT, 166), Reducer 4 (PARTITION-LEVEL SORT, 166) + Reducer 6 <- Reducer 5 (SORT, 1) + Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 8 (PARTITION-LEVEL SORT, 398) #### A masked pattern was here #### Vertices: Map 1 @@ -134,32 +84,33 @@ STAGE PLANS: expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 6 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col2 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Execution mode: vectorized + Map 11 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 10 + Map 12 Map Operator Tree: TableScan alias: store @@ -179,7 +130,7 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Map 11 + Map 13 Map Operator Tree: TableScan alias: item @@ -200,6 +151,25 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: string) Execution mode: vectorized Map 7 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: store_sales @@ -212,32 +182,65 @@ STAGE PLANS: expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 9 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col2 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(17,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work + Reducer 10 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: decimal(17,2)) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), count(_col2) + keys: _col1 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(38,13)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,13)) Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -252,7 +255,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(17,2)) - Reducer 3 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -273,7 +276,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 232318249 Data size: 20495183367 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col6 (type: string) - Reducer 4 + Reducer 5 Reduce Operator Tree: Join Operator condition map: @@ -293,7 +296,7 @@ STAGE PLANS: Statistics: Num rows: 255550079 Data size: 22544702192 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: string) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -310,35 +313,28 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 8 - Execution mode: vectorized + Reducer 9 Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: decimal(17,2)) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2), count(_col2) - keys: _col1 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(38,13)) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(38,13)) + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col3) + keys: _col2 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(17,2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query67.q.out b/ql/src/test/results/clientpositive/perf/spark/query67.q.out index db91e67fdc..597a468601 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query67.q.out @@ -153,16 +153,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int), d_qoy (type: int) outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized Map 9 diff --git a/ql/src/test/results/clientpositive/perf/spark/query68.q.out b/ql/src/test/results/clientpositive/perf/spark/query68.q.out index f2e676381f..20436e72f3 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query68.q.out @@ -170,16 +170,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 13 Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query69.q.out b/ql/src/test/results/clientpositive/perf/spark/query69.q.out index e17832c8e8..0498aa3880 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query69.q.out @@ -91,97 +91,22 @@ select limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 11 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 14 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 17 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean) - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 13 <- Map 12 (GROUP, 169) - Reducer 16 <- Map 15 (GROUP, 336) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 398), Map 12 (PARTITION-LEVEL SORT, 398) + Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 154), Map 16 (PARTITION-LEVEL SORT, 154) + Reducer 15 <- Reducer 14 (GROUP, 169) + Reducer 18 <- Map 17 (PARTITION-LEVEL SORT, 306), Map 20 (PARTITION-LEVEL SORT, 306) + Reducer 19 <- Reducer 18 (GROUP, 336) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 8 (PARTITION-LEVEL SORT, 855) Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 16 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648) + Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Reducer 19 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648) Reducer 6 <- Reducer 5 (GROUP, 265) Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### @@ -219,34 +144,33 @@ STAGE PLANS: expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 11 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 12 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 13 Map Operator Tree: TableScan alias: web_sales @@ -259,30 +183,33 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 14 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 15 + Map 16 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 17 Map Operator Tree: TableScan alias: catalog_sales @@ -295,29 +222,32 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_ship_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 17 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized + Map 20 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 8 Map Operator Tree: TableScan @@ -357,7 +287,51 @@ STAGE PLANS: Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string) Execution mode: vectorized - Reducer 13 + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reducer 14 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reducer 15 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -375,7 +349,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 16 + Reducer 18 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reducer 19 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query70.q.out b/ql/src/test/results/clientpositive/perf/spark/query70.q.out index 4222b5262d..1603d485be 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query70.q.out @@ -170,16 +170,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 7 Map Operator Tree: @@ -189,16 +189,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map 9 Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/spark/query73.q.out b/ql/src/test/results/clientpositive/perf/spark/query73.q.out index ddaca3d4a1..81aea57df4 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query73.q.out @@ -106,7 +106,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 567), Reducer 6 (PARTITION-LEVEL SORT, 567) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 551), Reducer 6 (PARTITION-LEVEL SORT, 551) Reducer 3 <- Reducer 2 (SORT, 1) Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) Reducer 6 <- Reducer 5 (GROUP, 529) @@ -160,16 +160,16 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((d_year) IN (2000, 2001, 2002) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -263,12 +263,12 @@ STAGE PLANS: Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 BETWEEN 1 AND 5 (type: boolean) - Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 42591679 Data size: 3757450287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 19166256 Data size: 1690852669 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: bigint) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/perf/spark/query79.q.out b/ql/src/test/results/clientpositive/perf/spark/query79.q.out index a83090f27c..1cab6f8c13 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query79.q.out @@ -60,11 +60,11 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 189 Data size: 361171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_city (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 189 Data size: 361171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col4 (type: int) diff --git a/ql/src/test/results/clientpositive/perf/spark/query86.q.out b/ql/src/test/results/clientpositive/perf/spark/query86.q.out index 1d1e4ef3d3..8dc28275c9 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query86.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query86.q.out @@ -49,43 +49,18 @@ select limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: d1 - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 174), Map 7 (PARTITION-LEVEL SORT, 174) - Reducer 3 <- Reducer 2 (GROUP, 556) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 278) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 174), Reducer 2 (PARTITION-LEVEL SORT, 174) + Reducer 4 <- Reducer 3 (GROUP, 556) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 278) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -101,26 +76,33 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_net_paid (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 6 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: decimal(7,2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work Map 7 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: item @@ -141,6 +123,22 @@ STAGE PLANS: value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: decimal(7,2)) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -166,7 +164,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 522729705 Data size: 71076122589 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(17,2)) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -185,7 +183,7 @@ STAGE PLANS: Map-reduce partition columns: (grouping(_col3, 1) + grouping(_col3, 0)) (type: bigint), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END (type: string) Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -223,7 +221,7 @@ STAGE PLANS: Statistics: Num rows: 261364852 Data size: 35538061226 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: decimal(17,2)), _col1 (type: string), _col2 (type: string) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query87.q.out b/ql/src/test/results/clientpositive/perf/spark/query87.q.out index 8ac6dcedbe..5948e56492 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query87.q.out @@ -41,68 +41,68 @@ from ((select distinct c_last_name, c_first_name, d_date ) cool_cust POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 14 (PARTITION-LEVEL SORT, 306) + Reducer 12 <- Map 15 (PARTITION-LEVEL SORT, 873), Reducer 11 (PARTITION-LEVEL SORT, 873) + Reducer 13 <- Reducer 12 (GROUP PARTITION-LEVEL SORT, 369) + Reducer 17 <- Map 14 (PARTITION-LEVEL SORT, 154), Map 16 (PARTITION-LEVEL SORT, 154) + Reducer 18 <- Map 15 (PARTITION-LEVEL SORT, 706), Reducer 17 (PARTITION-LEVEL SORT, 706) + Reducer 19 <- Reducer 18 (GROUP PARTITION-LEVEL SORT, 186) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 14 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 15 (PARTITION-LEVEL SORT, 975), Reducer 2 (PARTITION-LEVEL SORT, 975) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 481) + Reducer 5 <- Reducer 13 (GROUP PARTITION-LEVEL SORT, 213), Reducer 4 (GROUP PARTITION-LEVEL SORT, 213) + Reducer 6 <- Reducer 19 (GROUP, 56), Reducer 5 (GROUP, 56) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 7 + Map 1 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 12 + Map 10 Map Operator Tree: TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: catalog_sales + filterExpr: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int), d_date (type: string) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 17 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -110,66 +110,19 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 873), Map 9 (PARTITION-LEVEL SORT, 873) - Reducer 11 <- Reducer 10 (GROUP PARTITION-LEVEL SORT, 369) - Reducer 15 <- Map 13 (PARTITION-LEVEL SORT, 706), Map 14 (PARTITION-LEVEL SORT, 706) - Reducer 16 <- Reducer 15 (GROUP PARTITION-LEVEL SORT, 186) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 975), Map 13 (PARTITION-LEVEL SORT, 975) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 481) - Reducer 4 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 213), Reducer 3 (GROUP PARTITION-LEVEL SORT, 213) - Reducer 5 <- Reducer 16 (GROUP, 56), Reducer 4 (GROUP, 56) - Reducer 6 <- Reducer 5 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: (ss_sold_date_sk is not null and ss_customer_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 7 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 13 + Map 15 Map Operator Tree: TableScan alias: customer @@ -189,7 +142,7 @@ STAGE PLANS: Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 14 + Map 16 Map Operator Tree: TableScan alias: web_sales @@ -202,58 +155,30 @@ STAGE PLANS: expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 17 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 9 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: (cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - input vertices: - 1 Map 12 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reducer 10 + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 12 Reduce Operator Tree: Join Operator condition map: @@ -273,7 +198,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Reducer 11 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -311,7 +236,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 15 + Reducer 17 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 18 Reduce Operator Tree: Join Operator condition map: @@ -331,7 +272,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Reducer 16 + Reducer 19 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -370,6 +311,22 @@ STAGE PLANS: Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -389,7 +346,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -427,7 +384,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 261355616 Data size: 27168769766 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -469,7 +426,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 54450625 Data size: 7055042151 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -496,7 +453,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query9.q.out b/ql/src/test/results/clientpositive/perf/spark/query9.q.out index a43450147e..06ac9618ee 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query9.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query9.q.out @@ -145,11 +145,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -168,11 +168,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -191,9 +191,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -212,11 +212,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -235,11 +235,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 41 AND 60 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -258,9 +258,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -279,9 +279,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -300,11 +300,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -323,11 +323,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 61 AND 80 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -346,9 +346,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -367,11 +367,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -390,11 +390,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 81 AND 100 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -413,11 +413,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_ext_list_price (type: decimal(7,2)) outputColumnNames: ss_ext_list_price - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash @@ -436,11 +436,11 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_net_paid_inc_tax (type: decimal(7,2)) outputColumnNames: ss_net_paid_inc_tax - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash @@ -459,9 +459,9 @@ STAGE PLANS: Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ss_quantity BETWEEN 21 AND 40 (type: boolean) - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/perf/spark/query90.q.out b/ql/src/test/results/clientpositive/perf/spark/query90.q.out index 9f78d64bfb..b0cf9666ea 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query90.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query90.q.out @@ -43,9 +43,11 @@ select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-3, Stage-4 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-4 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -53,27 +55,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: time_dim - filterExpr: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t_time_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 11 + Map 13 Map Operator Tree: TableScan alias: household_demographics @@ -93,34 +75,34 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 9 - Map Operator Tree: - TableScan - alias: web_page - filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: wp_web_page_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work Stage: Stage-2 Spark Edges: - Reducer 8 <- Map 7 (GROUP, 1) + Reducer 10 <- Reducer 9 (GROUP, 1) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 169), Map 8 (PARTITION-LEVEL SORT, 169) #### A masked pattern was here #### Vertices: - Map 7 + Map 12 + Map Operator Tree: + TableScan + alias: time_dim + filterExpr: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t_hour BETWEEN 14 AND 15 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 8 Map Operator Tree: TableScan alias: web_sales @@ -141,40 +123,18 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 9 + 1 Map 11 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 10 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 11 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - Reducer 8 + Reducer 10 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -188,67 +148,82 @@ STAGE PLANS: keys: 0 1 + Reducer 9 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 13 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) - Stage: Stage-4 + Stage: Stage-5 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 7 Map Operator Tree: TableScan - alias: web_page - filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wp_web_page_sk (type: int) + expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col2 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work + + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: Map 5 Map Operator Tree: TableScan - alias: time_dim - filterExpr: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t_time_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 9600 Data size: 4521600 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 6 - Map Operator Tree: - TableScan - alias: household_demographics - filterExpr: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: web_page + filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_dep_count = 8) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) + expressions: wp_web_page_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -257,8 +232,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 169), Map 6 (PARTITION-LEVEL SORT, 169) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -282,40 +258,67 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 6 - Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: time_dim + filterExpr: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (t_hour BETWEEN 6 AND 7 and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t_time_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Reducer 2 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 174243235 Data size: 23692040863 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 7 + Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized Local Work: Map Reduce Local Work @@ -333,7 +336,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 8 + 1 Reducer 10 Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (CAST( _col0 AS decimal(15,4)) / CAST( _col1 AS decimal(15,4))) (type: decimal(35,20)) @@ -344,7 +347,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -362,6 +365,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 11 + Map Operator Tree: + TableScan + alias: web_page + filterExpr: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (wp_char_count BETWEEN 5000 AND 5200 and wp_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 511 Data size: 299380 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + Execution mode: vectorized + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: 100 diff --git a/ql/src/test/results/clientpositive/perf/spark/query97.q.out b/ql/src/test/results/clientpositive/perf/spark/query97.q.out index c4f4804452..7e7d791377 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query97.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query97.q.out @@ -47,17 +47,42 @@ from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 6 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Reducer 2 (GROUP, 437) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 386), Reducer 9 (PARTITION-LEVEL SORT, 386) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 306), Map 7 (PARTITION-LEVEL SORT, 306) + Reducer 9 <- Reducer 8 (GROUP, 336) #### A masked pattern was here #### Vertices: - Map 5 + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ss_sold_date_sk is not null (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: vectorized + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -65,24 +90,18 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 8 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -90,65 +109,18 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 437) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 386), Reducer 7 (PARTITION-LEVEL SORT, 386) - Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 336) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: store_sales - filterExpr: ss_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ss_sold_date_sk is not null (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 5 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 6 + Map 7 Map Operator Tree: TableScan alias: catalog_sales @@ -161,30 +133,34 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 8 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int), _col2 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized - Local Work: - Map Reduce Local Work Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col2 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -197,7 +173,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -221,7 +197,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Reducer 4 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -239,7 +215,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reducer 9 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query99.q.out b/ql/src/test/results/clientpositive/perf/spark/query99.q.out index f541da81a6..c3da1e0ef8 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query99.q.out @@ -76,27 +76,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 5 + Map 6 Map Operator Tree: TableScan alias: call_center @@ -116,7 +96,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 6 + Map 7 Map Operator Tree: TableScan alias: warehouse @@ -136,7 +116,7 @@ STAGE PLANS: Execution mode: vectorized Local Work: Map Reduce Local Work - Map 7 + Map 8 Map Operator Tree: TableScan alias: ship_mode @@ -160,8 +140,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 447) - Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 306), Map 5 (PARTITION-LEVEL SORT, 306) + Reducer 3 <- Reducer 2 (GROUP, 447) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -177,67 +158,92 @@ STAGE PLANS: expressions: cs_sold_date_sk (type: int), cs_ship_date_sk (type: int), cs_call_center_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Map 4 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col8 - input vertices: - 1 Map 5 - Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col8, _col10 - input vertices: - 1 Map 6 - Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col8, _col10, _col12 - input vertices: - 1 Map 7 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) + Execution mode: vectorized + Map 5 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Reducer 2 Local Work: Map Reduce Local Work - Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col4, _col8 + input vertices: + 1 Map 6 + Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3, _col8, _col10 + input vertices: + 1 Map 7 + Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col8, _col10, _col12 + input vertices: + 1 Map 8 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: substr(_col10, 1, 20) (type: string), _col12 (type: string), _col8 (type: string), CASE WHEN (((_col1 - _col0) <= 30)) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 30) and ((_col1 - _col0) <= 60))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 60) and ((_col1 - _col0) <= 90))) THEN (1) ELSE (0) END (type: int), CASE WHEN ((((_col1 - _col0) > 90) and ((_col1 - _col0) <= 120))) THEN (1) ELSE (0) END (type: int), CASE WHEN (((_col1 - _col0) > 120)) THEN (1) ELSE (0) END (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: PARTIAL Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -256,7 +262,7 @@ STAGE PLANS: Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: PARTIAL Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out index 5b55d44d91..f8fa17fd27 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -213,9 +213,9 @@ Stage-0 <-Map 12 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_185] PartitionCols:_col0 - Select Operator [SEL_184] (rows=4058 width=1119) + Select Operator [SEL_184] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_183] (rows=4058 width=1119) + Filter Operator [FIL_183] (rows=36524 width=1119) predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] @@ -236,7 +236,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_194] Group By Operator [GBY_191] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_186] (rows=4058 width=1119) + Select Operator [SEL_186] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_184] <-Reducer 7 [BROADCAST_EDGE] vectorized @@ -285,7 +285,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_195] Group By Operator [GBY_192] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_188] (rows=4058 width=1119) + Select Operator [SEL_188] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_184] <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized @@ -323,7 +323,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_196] Group By Operator [GBY_193] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_190] (rows=4058 width=1119) + Select Operator [SEL_190] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_184] diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out index b9efa45853..61b7070751 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out @@ -363,9 +363,9 @@ Stage-0 <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1808] PartitionCols:_col0 - Select Operator [SEL_1803] (rows=8116 width=1119) + Select Operator [SEL_1803] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_1802] (rows=8116 width=1119) + Filter Operator [FIL_1802] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_year BETWEEN 1998 AND 2000) TableScan [TS_13] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -386,7 +386,7 @@ Stage-0 SHUFFLE [RS_1814] Group By Operator [GBY_1812] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1805] (rows=8116 width=1119) + Select Operator [SEL_1805] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1803] <-Reducer 35 [CONTAINS] @@ -402,9 +402,9 @@ Stage-0 <-Map 37 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1836] PartitionCols:_col0 - Select Operator [SEL_1831] (rows=8116 width=1119) + Select Operator [SEL_1831] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_1830] (rows=8116 width=1119) + Filter Operator [FIL_1830] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_year BETWEEN 1998 AND 2000) TableScan [TS_24] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -425,7 +425,7 @@ Stage-0 SHUFFLE [RS_1842] Group By Operator [GBY_1840] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1833] (rows=8116 width=1119) + Select Operator [SEL_1833] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1831] <-Reducer 9 [CONTAINS] @@ -441,9 +441,9 @@ Stage-0 <-Map 102 [SIMPLE_EDGE] vectorized SHUFFLE [RS_1626] PartitionCols:_col0 - Select Operator [SEL_1615] (rows=8116 width=1119) + Select Operator [SEL_1615] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_1614] (rows=8116 width=1119) + Filter Operator [FIL_1614] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_year BETWEEN 1999 AND 2001) TableScan [TS_97] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -464,7 +464,7 @@ Stage-0 SHUFFLE [RS_1638] Group By Operator [GBY_1633] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1623] (rows=8116 width=1119) + Select Operator [SEL_1623] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized @@ -505,7 +505,7 @@ Stage-0 SHUFFLE [RS_1815] Group By Operator [GBY_1813] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1807] (rows=8116 width=1119) + Select Operator [SEL_1807] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1803] <-Reducer 41 [CONTAINS] @@ -539,7 +539,7 @@ Stage-0 SHUFFLE [RS_1843] Group By Operator [GBY_1841] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1835] (rows=8116 width=1119) + Select Operator [SEL_1835] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1831] <-Reducer 45 [CONTAINS] @@ -573,7 +573,7 @@ Stage-0 SHUFFLE [RS_1639] Group By Operator [GBY_1634] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1625] (rows=8116 width=1119) + Select Operator [SEL_1625] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 57 [CUSTOM_SIMPLE_EDGE] vectorized @@ -676,7 +676,7 @@ Stage-0 SHUFFLE [RS_1635] Group By Operator [GBY_1630] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1617] (rows=8116 width=1119) + Select Operator [SEL_1617] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 71 [BROADCAST_EDGE] vectorized @@ -742,7 +742,7 @@ Stage-0 SHUFFLE [RS_1636] Group By Operator [GBY_1631] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1619] (rows=8116 width=1119) + Select Operator [SEL_1619] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 74 [BROADCAST_EDGE] vectorized @@ -804,7 +804,7 @@ Stage-0 SHUFFLE [RS_1637] Group By Operator [GBY_1632] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1621] (rows=8116 width=1119) + Select Operator [SEL_1621] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_1615] <-Reducer 77 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query22.q.out b/ql/src/test/results/clientpositive/perf/tez/query22.q.out index cfe780a270..ca5c970ad4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query22.q.out @@ -112,9 +112,9 @@ Stage-0 <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_81] PartitionCols:_col0 - Select Operator [SEL_80] (rows=8116 width=1119) + Select Operator [SEL_80] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_79] (rows=8116 width=1119) + Filter Operator [FIL_79] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/query25.q.out index b68c54aef7..044df14792 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query25.q.out @@ -177,9 +177,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_220] (rows=4058 width=1119) + Select Operator [SEL_220] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_217] (rows=4058 width=1119) + Filter Operator [FIL_217] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] @@ -200,9 +200,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_219] (rows=4058 width=1119) + Select Operator [SEL_219] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_216] (rows=4058 width=1119) + Filter Operator [FIL_216] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) Please refer to the previous TableScan [TS_3] <-Map 16 [SIMPLE_EDGE] vectorized @@ -255,7 +255,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_229] Group By Operator [GBY_227] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_224] (rows=4058 width=1119) + Select Operator [SEL_224] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_219] <-Reducer 2 [SIMPLE_EDGE] diff --git a/ql/src/test/results/clientpositive/perf/tez/query28.q.out b/ql/src/test/results/clientpositive/perf/tez/query28.q.out index ca0bf37abf..27546e84f4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query28.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query28.q.out @@ -142,16 +142,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_139] Group By Operator [GBY_138] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_137] (rows=21333171 width=88) + Group By Operator [GBY_137] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_116] PartitionCols:_col0 - Group By Operator [GBY_110] (rows=21333171 width=88) + Group By Operator [GBY_110] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_104] (rows=21333171 width=88) + Select Operator [SEL_104] (rows=191998545 width=88) Output:["ss_list_price"] - Filter Operator [FIL_98] (rows=21333171 width=88) + Filter Operator [FIL_98] (rows=191998545 width=88) predicate:((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] @@ -165,16 +165,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_145] Group By Operator [GBY_144] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_143] (rows=21333171 width=88) + Group By Operator [GBY_143] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_117] PartitionCols:_col0 - Group By Operator [GBY_111] (rows=21333171 width=88) + Group By Operator [GBY_111] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_105] (rows=21333171 width=88) + Select Operator [SEL_105] (rows=191998545 width=88) Output:["ss_list_price"] - Filter Operator [FIL_99] (rows=21333171 width=88) + Filter Operator [FIL_99] (rows=191998545 width=88) predicate:((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) Please refer to the previous TableScan [TS_0] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized @@ -187,16 +187,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_151] Group By Operator [GBY_150] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_149] (rows=21333171 width=88) + Group By Operator [GBY_149] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_118] PartitionCols:_col0 - Group By Operator [GBY_112] (rows=21333171 width=88) + Group By Operator [GBY_112] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_106] (rows=21333171 width=88) + Select Operator [SEL_106] (rows=191998545 width=88) Output:["ss_list_price"] - Filter Operator [FIL_100] (rows=21333171 width=88) + Filter Operator [FIL_100] (rows=191998545 width=88) predicate:((ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52) and ss_quantity BETWEEN 6 AND 10) Please refer to the previous TableScan [TS_0] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized @@ -209,16 +209,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_121] Group By Operator [GBY_120] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_119] (rows=21333171 width=88) + Group By Operator [GBY_119] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_113] PartitionCols:_col0 - Group By Operator [GBY_107] (rows=21333171 width=88) + Group By Operator [GBY_107] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_101] (rows=21333171 width=88) + Select Operator [SEL_101] (rows=191998545 width=88) Output:["ss_list_price"] - Filter Operator [FIL_95] (rows=21333171 width=88) + Filter Operator [FIL_95] (rows=191998545 width=88) predicate:((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) Please refer to the previous TableScan [TS_0] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized @@ -231,16 +231,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_127] Group By Operator [GBY_126] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_125] (rows=21333171 width=88) + Group By Operator [GBY_125] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_114] PartitionCols:_col0 - Group By Operator [GBY_108] (rows=21333171 width=88) + Group By Operator [GBY_108] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_102] (rows=21333171 width=88) + Select Operator [SEL_102] (rows=191998545 width=88) Output:["ss_list_price"] - Filter Operator [FIL_96] (rows=21333171 width=88) + Filter Operator [FIL_96] (rows=191998545 width=88) predicate:((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) Please refer to the previous TableScan [TS_0] <-Reducer 8 [CUSTOM_SIMPLE_EDGE] vectorized @@ -253,16 +253,16 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_133] Group By Operator [GBY_132] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_131] (rows=21333171 width=88) + Group By Operator [GBY_131] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_115] PartitionCols:_col0 - Group By Operator [GBY_109] (rows=21333171 width=88) + Group By Operator [GBY_109] (rows=191998545 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_103] (rows=21333171 width=88) + Select Operator [SEL_103] (rows=191998545 width=88) Output:["ss_list_price"] - Filter Operator [FIL_97] (rows=21333171 width=88) + Filter Operator [FIL_97] (rows=191998545 width=88) predicate:((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) Please refer to the previous TableScan [TS_0] diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out index b05162282c..0f019d54de 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -178,9 +178,9 @@ Stage-0 <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_220] (rows=4058 width=1119) + Select Operator [SEL_220] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_218] (rows=4058 width=1119) + Filter Operator [FIL_218] (rows=36524 width=1119) predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query34.q.out b/ql/src/test/results/clientpositive/perf/tez/query34.q.out index 7d140921d9..b339193cb0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query34.q.out @@ -99,7 +99,7 @@ Stage-0 <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_139] PartitionCols:_col1 - Filter Operator [FIL_138] (rows=42591679 width=88) + Filter Operator [FIL_138] (rows=19166256 width=88) predicate:_col2 BETWEEN 15 AND 20 Select Operator [SEL_137] (rows=383325119 width=88) Output:["_col0","_col1","_col2"] @@ -143,9 +143,9 @@ Stage-0 <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_109] PartitionCols:_col0 - Select Operator [SEL_108] (rows=16232 width=1119) + Select Operator [SEL_108] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_107] (rows=16232 width=1119) + Filter Operator [FIL_107] (rows=73049 width=1119) predicate:((d_dom BETWEEN 1 AND 3 or d_dom BETWEEN 25 AND 28) and (d_year) IN (2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] @@ -166,7 +166,7 @@ Stage-0 SHUFFLE [RS_112] Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_110] (rows=16232 width=1119) + Select Operator [SEL_110] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_108] <-Reducer 13 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query38.q.out b/ql/src/test/results/clientpositive/perf/tez/query38.q.out index 78cb5e15cb..915f228f53 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query38.q.out @@ -122,9 +122,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_203] PartitionCols:_col0 - Select Operator [SEL_200] (rows=8116 width=1119) + Select Operator [SEL_200] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_199] (rows=8116 width=1119) + Filter Operator [FIL_199] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_month_seq"] @@ -145,7 +145,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_211] Group By Operator [GBY_208] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_204] (rows=8116 width=1119) + Select Operator [SEL_204] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_200] <-Reducer 20 [BROADCAST_EDGE] vectorized @@ -207,7 +207,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_212] Group By Operator [GBY_209] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_206] (rows=8116 width=1119) + Select Operator [SEL_206] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_200] <-Reducer 4 [CONTAINS] vectorized @@ -269,7 +269,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_210] Group By Operator [GBY_207] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_202] (rows=8116 width=1119) + Select Operator [SEL_202] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_200] diff --git a/ql/src/test/results/clientpositive/perf/tez/query51.q.out b/ql/src/test/results/clientpositive/perf/tez/query51.q.out index ba3f994b42..ec9f50cb28 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query51.q.out @@ -145,9 +145,9 @@ Stage-0 <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_92] PartitionCols:_col0 - Select Operator [SEL_89] (rows=8116 width=1119) + Select Operator [SEL_89] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_88] (rows=8116 width=1119) + Filter Operator [FIL_88] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_month_seq"] @@ -168,7 +168,7 @@ Stage-0 SHUFFLE [RS_97] Group By Operator [GBY_95] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_93] (rows=8116 width=1119) + Select Operator [SEL_93] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_89] <-Reducer 3 [SIMPLE_EDGE] @@ -208,7 +208,7 @@ Stage-0 SHUFFLE [RS_96] Group By Operator [GBY_94] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_91] (rows=8116 width=1119) + Select Operator [SEL_91] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_89] diff --git a/ql/src/test/results/clientpositive/perf/tez/query59.q.out b/ql/src/test/results/clientpositive/perf/tez/query59.q.out index a4be291b1b..54ddb1f4d7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query59.q.out @@ -144,9 +144,9 @@ Stage-0 <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_216] PartitionCols:_col1 - Select Operator [SEL_214] (rows=8116 width=1119) + Select Operator [SEL_214] (rows=73049 width=1119) Output:["_col1"] - Filter Operator [FIL_212] (rows=8116 width=1119) + Filter Operator [FIL_212] (rows=73049 width=1119) predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) TableScan [TS_15] (rows=73049 width=1119) default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_week_seq"] @@ -226,9 +226,9 @@ Stage-0 <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_215] PartitionCols:_col1 - Select Operator [SEL_213] (rows=8116 width=1119) + Select Operator [SEL_213] (rows=73049 width=1119) Output:["_col1"] - Filter Operator [FIL_211] (rows=8116 width=1119) + Filter Operator [FIL_211] (rows=73049 width=1119) predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) Please refer to the previous TableScan [TS_15] <-Reducer 3 [SIMPLE_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query65.q.out b/ql/src/test/results/clientpositive/perf/tez/query65.q.out index c9759c396d..e86179cd04 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query65.q.out @@ -133,9 +133,9 @@ Stage-0 <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_134] PartitionCols:_col0 - Select Operator [SEL_131] (rows=8116 width=1119) + Select Operator [SEL_131] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_130] (rows=8116 width=1119) + Filter Operator [FIL_130] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] @@ -167,7 +167,7 @@ Stage-0 SHUFFLE [RS_139] Group By Operator [GBY_137] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_135] (rows=8116 width=1119) + Select Operator [SEL_135] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_131] <-Reducer 3 [SIMPLE_EDGE] vectorized @@ -217,7 +217,7 @@ Stage-0 SHUFFLE [RS_138] Group By Operator [GBY_136] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_133] (rows=8116 width=1119) + Select Operator [SEL_133] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_131] diff --git a/ql/src/test/results/clientpositive/perf/tez/query67.q.out b/ql/src/test/results/clientpositive/perf/tez/query67.q.out index 960e66861c..9f41800c87 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query67.q.out @@ -165,9 +165,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=8116 width=1119) + Select Operator [SEL_86] (rows=73049 width=1119) Output:["_col0","_col2","_col3","_col4"] - Filter Operator [FIL_85] (rows=8116 width=1119) + Filter Operator [FIL_85] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq","d_year","d_moy","d_qoy"] @@ -210,7 +210,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=8116 width=1119) + Select Operator [SEL_88] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git a/ql/src/test/results/clientpositive/perf/tez/query68.q.out b/ql/src/test/results/clientpositive/perf/tez/query68.q.out index fd1e04b328..b0dc8390a4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query68.q.out @@ -193,9 +193,9 @@ Stage-0 <-Map 14 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_158] PartitionCols:_col0 - Select Operator [SEL_157] (rows=8116 width=1119) + Select Operator [SEL_157] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_156] (rows=8116 width=1119) + Filter Operator [FIL_156] (rows=73049 width=1119) predicate:((d_year) IN (1998, 1999, 2000) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] @@ -216,7 +216,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_161] Group By Operator [GBY_160] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_159] (rows=8116 width=1119) + Select Operator [SEL_159] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_157] <-Reducer 17 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query69.q.out b/ql/src/test/results/clientpositive/perf/tez/query69.q.out index 4aad667ba3..58e9c5071a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query69.q.out @@ -158,9 +158,9 @@ Stage-0 <-Map 13 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_197] PartitionCols:_col0 - Select Operator [SEL_192] (rows=4058 width=1119) + Select Operator [SEL_192] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_191] (rows=4058 width=1119) + Filter Operator [FIL_191] (rows=36524 width=1119) predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] @@ -181,7 +181,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_204] Group By Operator [GBY_201] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_198] (rows=4058 width=1119) + Select Operator [SEL_198] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_192] <-Reducer 4 [ONE_TO_ONE_EDGE] @@ -260,7 +260,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_202] Group By Operator [GBY_199] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_194] (rows=4058 width=1119) + Select Operator [SEL_194] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_192] <-Reducer 8 [BROADCAST_EDGE] vectorized @@ -309,7 +309,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_203] Group By Operator [GBY_200] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_196] (rows=4058 width=1119) + Select Operator [SEL_196] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_192] diff --git a/ql/src/test/results/clientpositive/perf/tez/query70.q.out b/ql/src/test/results/clientpositive/perf/tez/query70.q.out index 4efb85a52c..c80c0d5235 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query70.q.out @@ -166,9 +166,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_139] PartitionCols:_col0 - Select Operator [SEL_136] (rows=8116 width=1119) + Select Operator [SEL_136] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_135] (rows=8116 width=1119) + Filter Operator [FIL_135] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] @@ -189,7 +189,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_144] Group By Operator [GBY_142] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_140] (rows=8116 width=1119) + Select Operator [SEL_140] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_136] <-Reducer 19 [BROADCAST_EDGE] vectorized @@ -254,7 +254,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_143] Group By Operator [GBY_141] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_138] (rows=8116 width=1119) + Select Operator [SEL_138] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_136] diff --git a/ql/src/test/results/clientpositive/perf/tez/query73.q.out b/ql/src/test/results/clientpositive/perf/tez/query73.q.out index b18b8608db..e48514476d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query73.q.out @@ -93,7 +93,7 @@ Stage-0 <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_139] PartitionCols:_col1 - Filter Operator [FIL_138] (rows=42591679 width=88) + Filter Operator [FIL_138] (rows=19166256 width=88) predicate:_col2 BETWEEN 1 AND 5 Select Operator [SEL_137] (rows=383325119 width=88) Output:["_col0","_col1","_col2"] @@ -137,9 +137,9 @@ Stage-0 <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_109] PartitionCols:_col0 - Select Operator [SEL_108] (rows=8116 width=1119) + Select Operator [SEL_108] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_107] (rows=8116 width=1119) + Filter Operator [FIL_107] (rows=73049 width=1119) predicate:((d_year) IN (2000, 2001, 2002) and d_date_sk is not null and d_dom BETWEEN 1 AND 2) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_dom"] @@ -160,7 +160,7 @@ Stage-0 SHUFFLE [RS_112] Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_110] (rows=8116 width=1119) + Select Operator [SEL_110] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_108] <-Reducer 13 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query79.q.out b/ql/src/test/results/clientpositive/perf/tez/query79.q.out index 7b9d48f123..ac8eef632d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query79.q.out @@ -113,9 +113,9 @@ Stage-0 <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_116] (rows=189 width=1910) + Select Operator [SEL_116] (rows=1704 width=1910) Output:["_col0","_col2"] - Filter Operator [FIL_115] (rows=189 width=1910) + Filter Operator [FIL_115] (rows=1704 width=1910) predicate:(s_number_employees BETWEEN 200 AND 295 and s_store_sk is not null) TableScan [TS_9] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_number_employees","s_city"] @@ -161,7 +161,7 @@ Stage-0 SHUFFLE [RS_120] Group By Operator [GBY_119] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_118] (rows=189 width=1910) + Select Operator [SEL_118] (rows=1704 width=1910) Output:["_col0"] Please refer to the previous Select Operator [SEL_116] <-Reducer 15 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query86.q.out b/ql/src/test/results/clientpositive/perf/tez/query86.q.out index 8140b72795..b6e7ce01ed 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query86.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query86.q.out @@ -111,9 +111,9 @@ Stage-0 <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_63] PartitionCols:_col0 - Select Operator [SEL_62] (rows=8116 width=1119) + Select Operator [SEL_62] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_61] (rows=8116 width=1119) + Filter Operator [FIL_61] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] @@ -145,7 +145,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_66] Group By Operator [GBY_65] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_64] (rows=8116 width=1119) + Select Operator [SEL_64] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_62] diff --git a/ql/src/test/results/clientpositive/perf/tez/query87.q.out b/ql/src/test/results/clientpositive/perf/tez/query87.q.out index 1b9256d53e..00b31aa698 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query87.q.out @@ -123,9 +123,9 @@ Stage-0 <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_232] PartitionCols:_col0 - Select Operator [SEL_227] (rows=8116 width=1119) + Select Operator [SEL_227] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_226] (rows=8116 width=1119) + Filter Operator [FIL_226] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_month_seq"] @@ -146,7 +146,7 @@ Stage-0 SHUFFLE [RS_239] Group By Operator [GBY_236] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_233] (rows=8116 width=1119) + Select Operator [SEL_233] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_227] <-Reducer 6 [CONTAINS] vectorized @@ -219,7 +219,7 @@ Stage-0 SHUFFLE [RS_238] Group By Operator [GBY_235] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_231] (rows=8116 width=1119) + Select Operator [SEL_231] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_227] <-Reducer 22 [BROADCAST_EDGE] vectorized @@ -285,7 +285,7 @@ Stage-0 SHUFFLE [RS_237] Group By Operator [GBY_234] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_229] (rows=8116 width=1119) + Select Operator [SEL_229] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_227] <-Reducer 21 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query9.q.out b/ql/src/test/results/clientpositive/perf/tez/query9.q.out index 611daa8b88..47aadb1de1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query9.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query9.q.out @@ -189,9 +189,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_253] Group By Operator [GBY_248] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_243] (rows=63999515 width=88) + Select Operator [SEL_243] (rows=575995635 width=88) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_238] (rows=63999515 width=88) + Filter Operator [FIL_238] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 41 AND 60 TableScan [TS_80] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_net_paid_inc_tax"] @@ -209,9 +209,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_230] Group By Operator [GBY_225] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_220] (rows=63999515 width=88) + Select Operator [SEL_220] (rows=575995635 width=88) Output:["ss_ext_list_price"] - Filter Operator [FIL_215] (rows=63999515 width=88) + Filter Operator [FIL_215] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 41 AND 60 TableScan [TS_73] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_ext_list_price"] @@ -227,8 +227,8 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_208] Group By Operator [GBY_203] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_198] (rows=63999515 width=88) - Filter Operator [FIL_193] (rows=63999515 width=88) + Select Operator [SEL_198] (rows=575995635 width=88) + Filter Operator [FIL_193] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 41 AND 60 TableScan [TS_66] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity"] @@ -246,9 +246,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_252] Group By Operator [GBY_247] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_242] (rows=63999515 width=88) + Select Operator [SEL_242] (rows=575995635 width=88) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_237] (rows=63999515 width=88) + Filter Operator [FIL_237] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 21 AND 40 Please refer to the previous TableScan [TS_80] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] @@ -265,9 +265,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_229] Group By Operator [GBY_224] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_219] (rows=63999515 width=88) + Select Operator [SEL_219] (rows=575995635 width=88) Output:["ss_ext_list_price"] - Filter Operator [FIL_214] (rows=63999515 width=88) + Filter Operator [FIL_214] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 21 AND 40 Please refer to the previous TableScan [TS_73] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] @@ -282,8 +282,8 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_207] Group By Operator [GBY_202] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_197] (rows=63999515 width=88) - Filter Operator [FIL_192] (rows=63999515 width=88) + Select Operator [SEL_197] (rows=575995635 width=88) + Filter Operator [FIL_192] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 21 AND 40 Please refer to the previous TableScan [TS_66] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] @@ -313,8 +313,8 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_206] Group By Operator [GBY_201] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_196] (rows=63999515 width=88) - Filter Operator [FIL_191] (rows=63999515 width=88) + Select Operator [SEL_196] (rows=575995635 width=88) + Filter Operator [FIL_191] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 1 AND 20 Please refer to the previous TableScan [TS_66] <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized @@ -327,9 +327,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_228] Group By Operator [GBY_223] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_218] (rows=63999515 width=88) + Select Operator [SEL_218] (rows=575995635 width=88) Output:["ss_ext_list_price"] - Filter Operator [FIL_213] (rows=63999515 width=88) + Filter Operator [FIL_213] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 1 AND 20 Please refer to the previous TableScan [TS_73] <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized @@ -342,9 +342,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_251] Group By Operator [GBY_246] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_241] (rows=63999515 width=88) + Select Operator [SEL_241] (rows=575995635 width=88) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_236] (rows=63999515 width=88) + Filter Operator [FIL_236] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 1 AND 20 Please refer to the previous TableScan [TS_80] <-Reducer 18 [CUSTOM_SIMPLE_EDGE] vectorized @@ -355,8 +355,8 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_204] Group By Operator [GBY_199] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_194] (rows=63999515 width=88) - Filter Operator [FIL_189] (rows=63999515 width=88) + Select Operator [SEL_194] (rows=575995635 width=88) + Filter Operator [FIL_189] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 61 AND 80 Please refer to the previous TableScan [TS_66] <-Reducer 24 [CUSTOM_SIMPLE_EDGE] vectorized @@ -369,9 +369,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_226] Group By Operator [GBY_221] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_216] (rows=63999515 width=88) + Select Operator [SEL_216] (rows=575995635 width=88) Output:["ss_ext_list_price"] - Filter Operator [FIL_211] (rows=63999515 width=88) + Filter Operator [FIL_211] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 61 AND 80 Please refer to the previous TableScan [TS_73] <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized @@ -384,9 +384,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_249] Group By Operator [GBY_244] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_239] (rows=63999515 width=88) + Select Operator [SEL_239] (rows=575995635 width=88) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_234] (rows=63999515 width=88) + Filter Operator [FIL_234] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 61 AND 80 Please refer to the previous TableScan [TS_80] <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized @@ -397,8 +397,8 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_205] Group By Operator [GBY_200] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_195] (rows=63999515 width=88) - Filter Operator [FIL_190] (rows=63999515 width=88) + Select Operator [SEL_195] (rows=575995635 width=88) + Filter Operator [FIL_190] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 81 AND 100 Please refer to the previous TableScan [TS_66] <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized @@ -411,9 +411,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_227] Group By Operator [GBY_222] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] - Select Operator [SEL_217] (rows=63999515 width=88) + Select Operator [SEL_217] (rows=575995635 width=88) Output:["ss_ext_list_price"] - Filter Operator [FIL_212] (rows=63999515 width=88) + Filter Operator [FIL_212] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 81 AND 100 Please refer to the previous TableScan [TS_73] <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized @@ -426,9 +426,9 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_250] Group By Operator [GBY_245] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] - Select Operator [SEL_240] (rows=63999515 width=88) + Select Operator [SEL_240] (rows=575995635 width=88) Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_235] (rows=63999515 width=88) + Filter Operator [FIL_235] (rows=575995635 width=88) predicate:ss_quantity BETWEEN 81 AND 100 Please refer to the previous TableScan [TS_80] diff --git a/ql/src/test/results/clientpositive/perf/tez/query90.q.out b/ql/src/test/results/clientpositive/perf/tez/query90.q.out index ae5c596192..f52669d7af 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query90.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query90.q.out @@ -106,9 +106,9 @@ Stage-0 <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_171] PartitionCols:_col0 - Select Operator [SEL_168] (rows=9600 width=471) + Select Operator [SEL_168] (rows=86400 width=471) Output:["_col0"] - Filter Operator [FIL_166] (rows=9600 width=471) + Filter Operator [FIL_166] (rows=86400 width=471) predicate:(t_hour BETWEEN 14 AND 15 and t_time_sk is not null) TableScan [TS_6] (rows=86400 width=471) default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour"] @@ -154,7 +154,7 @@ Stage-0 SHUFFLE [RS_176] Group By Operator [GBY_174] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_172] (rows=9600 width=471) + Select Operator [SEL_172] (rows=86400 width=471) Output:["_col0"] Please refer to the previous Select Operator [SEL_168] <-Reducer 20 [BROADCAST_EDGE] vectorized @@ -190,9 +190,9 @@ Stage-0 <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_169] PartitionCols:_col0 - Select Operator [SEL_167] (rows=9600 width=471) + Select Operator [SEL_167] (rows=86400 width=471) Output:["_col0"] - Filter Operator [FIL_165] (rows=9600 width=471) + Filter Operator [FIL_165] (rows=86400 width=471) predicate:(t_hour BETWEEN 6 AND 7 and t_time_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 2 [SIMPLE_EDGE] @@ -221,7 +221,7 @@ Stage-0 SHUFFLE [RS_175] Group By Operator [GBY_173] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_170] (rows=9600 width=471) + Select Operator [SEL_170] (rows=86400 width=471) Output:["_col0"] Please refer to the previous Select Operator [SEL_167] <-Reducer 19 [BROADCAST_EDGE] vectorized diff --git a/ql/src/test/results/clientpositive/perf/tez/query97.q.out b/ql/src/test/results/clientpositive/perf/tez/query97.q.out index b65b57aac1..db42b981d3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query97.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query97.q.out @@ -93,9 +93,9 @@ Stage-0 <-Map 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_72] PartitionCols:_col0 - Select Operator [SEL_71] (rows=8116 width=1119) + Select Operator [SEL_71] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_70] (rows=8116 width=1119) + Filter Operator [FIL_70] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] @@ -116,7 +116,7 @@ Stage-0 SHUFFLE [RS_78] Group By Operator [GBY_76] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_73] (rows=8116 width=1119) + Select Operator [SEL_73] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_71] <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized @@ -152,7 +152,7 @@ Stage-0 SHUFFLE [RS_79] Group By Operator [GBY_77] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_75] (rows=8116 width=1119) + Select Operator [SEL_75] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_71] diff --git a/ql/src/test/results/clientpositive/perf/tez/query99.q.out b/ql/src/test/results/clientpositive/perf/tez/query99.q.out index 456fd8c96f..97eb605439 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query99.q.out @@ -153,9 +153,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_103] (rows=8116 width=1119) + Select Operator [SEL_103] (rows=73049 width=1119) Output:["_col0"] - Filter Operator [FIL_102] (rows=8116 width=1119) + Filter Operator [FIL_102] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] @@ -209,7 +209,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_107] Group By Operator [GBY_106] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_105] (rows=8116 width=1119) + Select Operator [SEL_105] (rows=73049 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_103] diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out index 1627af2561..098b53a471 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out @@ -258,11 +258,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator filter predicates: 0 {_col0 BETWEEN 100 AND 102} @@ -291,10 +291,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out index 9cc2de467f..a5f3fb952c 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out @@ -112,11 +112,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key BETWEEN 100 AND 102 and value is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -270,11 +270,11 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 @@ -289,11 +289,11 @@ STAGE PLANS: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -301,13 +301,13 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -406,7 +406,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 16 Data size: 317 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out index a1414090f3..1f60dcd555 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out @@ -932,11 +932,11 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (col1 is not null and col2 BETWEEN 1 AND 3) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -944,15 +944,15 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator Target Columns: [Map 2 -> [part_col:int (part_col)]] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out index dc7a510510..8a14d71c47 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -3762,9 +3762,9 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_size BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -3783,7 +3783,7 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_partkey BETWEEN 1 AND 20 (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(p_partkey), count(p_partkey) mode: hash