diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index f0b41f36f3..0f27336bc9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -592,4 +592,23 @@ private static boolean areBacktrackedExprsCompatible(final List or } return currRS; } + + /*** + * Given group by operator on reduce side, this tries to get to the group by on map side (partial/merge) + * @param reduceSideGbOp Make sure this is group by side reducer + * @return map side gb if any, else null + */ + public static GroupByOperator findMapSideGb(final GroupByOperator reduceSideGbOp) { + Operator parentOp = reduceSideGbOp; + while(parentOp.getParentOperators() != null && parentOp.getParentOperators().size() > 0) { + if(parentOp.getParentOperators().size() > 1) { + return null; + } + parentOp = parentOp.getParentOperators().get(0); + if(parentOp instanceof GroupByOperator ) { + return (GroupByOperator)parentOp; + } + } + return null; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 32fba6c8ff..fbb0def60d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1382,7 +1382,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else { // Case 3: column stats, hash aggregation, NO grouping sets cardinality = Math.min(parentNumRows/2, StatsUtils.safeMult(ndvProduct, parallelism)); - long orgParentNumRows = getParentNumRows(gop, gop.getConf().getKeys(), conf); + long orgParentNumRows = StatsUtils.safeMult(getParentNumRows(gop, gop.getConf().getKeys(), conf) , + parallelism); cardinality = Math.min(cardinality, orgParentNumRows); if (LOG.isDebugEnabled()) { @@ -1410,8 +1411,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // in reduce side GBY, we don't know if the grouping set was present or not. so get it // from map side GBY - GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class); - if (mGop != null) { + GroupByOperator mGop = OperatorUtils.findMapSideGb(gop); + if(mGop != null) { containsGroupingSet = mGop.getConf().isGroupingSetsPresent(); } @@ -1426,6 +1427,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else { // Case 9: column stats, NO grouping sets cardinality = Math.min(parentNumRows, ndvProduct); + // to get to the source number of rows we should be using original group by + GroupByOperator gOpStats = mGop; + if(gOpStats == null) { + // it could be NULL in case the plan has single group by (instead of merge and final) e.g. autogather stats + gOpStats = gop; + } + long orgParentNumRows = getParentNumRows(gOpStats, gOpStats.getConf().getKeys(), conf) ; + cardinality = Math.min(orgParentNumRows, cardinality); if (LOG.isDebugEnabled()) { LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: " + cardinality); diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index 7bee405977..135c531e7b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -783,7 +783,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: final outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) @@ -792,13 +792,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 9 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 9 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index f929706757..d43480e89f 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -2296,7 +2296,7 @@ STAGE PLANS: keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1251 Data size: 83804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 83872 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col4 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col4)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), ((_col5 - ((_col6 * _col6) / _col7)) / _col7) (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col8 (type: bigint), (_col8 - -89010L) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -2305,7 +2305,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 9, 10, 16, 4, 18, 19, 17, 14, 8, 20] selectExpressions: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 9:int, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 14:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 10:double) -> 11:double) -> 10:double, IfExprNullCondExpr(col 12:boolean, null, col 13:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 12:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 13:bigint) -> 14:bigint) -> 11:double) -> 10:double, DecimalScalarDivideDecimalColumn(val -1.389, col 15:decimal(5,0))(children: CastLongToDecimal(col 0:smallint) -> 15:decimal(5,0)) -> 16:decimal(10,9), DoubleColDivideDoubleColumn(col 11:double, col 17:double)(children: CastLongToDouble(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 11:double, CastLongToDouble(col 4:bigint) -> 17:double) -> 18:double, LongColUnaryMinus(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 19:int, DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 17:double)(children: DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 11:double) -> 17:double) -> 11:double) -> 17:double, LongColUnaryMinus(col 20:int)(children: LongColUnaryMinus(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 20:int) -> 14:int, LongColSubtractLongScalar(col 8:bigint, val -89010) -> 20:bigint - Statistics: Num rows: 1251 Data size: 218912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 219088 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ @@ -2313,7 +2313,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1251 Data size: 218912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 219088 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -2331,7 +2331,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1251 Data size: 218912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 219088 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -2578,7 +2578,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1251 Data size: 57520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 57568 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double), (2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), _col4 (type: bigint), ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D) (type: double), ((- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) * ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D)) (type: double), _col5 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (_col0 - (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END))) (type: double), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (_col0 + ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (_col0 * 762.0D) (type: double), _col2 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2587,7 +2587,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 7, 6, 11, 4, 17, 20, 5, 23, 26, 14, 29, 30, 2, 34] selectExpressions: DoubleColDivideLongColumn(col 6:double, col 10:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 7:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 7:double) -> 6:double, IfExprNullCondExpr(col 8:boolean, null, col 9:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 8:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 9:bigint) -> 10:bigint) -> 7:double, DoubleScalarMultiplyDoubleColumn(val 2563.58, col 11:double)(children: DoubleColDivideLongColumn(col 6:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 11:double) -> 6:double, IfExprNullCondExpr(col 10:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 10:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 11:double) -> 6:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 11:double) -> 14:double) -> 11:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 11:double, DoubleColAddDoubleScalar(col 14:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 17:double)(children: DoubleColDivideLongColumn(col 14:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 17:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 17:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 14:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 14:double, col 23:double)(children: DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideLongColumn(col 14:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 20:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 20:double) -> 14:double, IfExprNullCondExpr(col 19:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 20:double) -> 14:double, DoubleColAddDoubleScalar(col 20:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 23:double)(children: DoubleColDivideLongColumn(col 20:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 20:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 20:double) -> 23:double) -> 20:double, IfExprNullCondExpr(col 22:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 20:double) -> 23:double) -> 20:double, DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 23:double) -> 14:double) -> 23:double, DoubleColSubtractDoubleColumn(col 0:double, col 14:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColDivideLongColumn(col 14:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 26:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 26:double) -> 14:double, IfExprNullCondExpr(col 25:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 14:double) -> 26:double, FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 29:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 29:double) -> 14:double) -> 29:double) -> 14:double, DoubleColAddDoubleColumn(col 0:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 29:double) -> 30:double) -> 29:double, IfExprNullCondExpr(col 28:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 30:double) -> 29:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 30:double, DoubleScalarModuloDoubleColumn(val -863.257, col 33:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 33:double) -> 34:double - Statistics: Num rows: 1251 Data size: 157600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 157728 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -2595,7 +2595,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1251 Data size: 157600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 157728 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -2613,13 +2613,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13] - Statistics: Num rows: 1251 Data size: 157600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 157728 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1251 Data size: 157600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1252 Data size: 157728 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out index 32b0e3ec2a..ee794ad6f0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out @@ -306,13 +306,13 @@ Stage-0 File Output Operator [FS_300] Limit [LIM_299] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_298] (rows=38846 width=619) + Select Operator [SEL_298] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_297] - Select Operator [SEL_296] (rows=38846 width=619) + Select Operator [SEL_296] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_295] (rows=38846 width=627) + Group By Operator [GBY_295] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] vectorized @@ -329,7 +329,7 @@ Stage-0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 - Group By Operator [GBY_44] (rows=46000 width=548) + Group By Operator [GBY_44] (rows=2835758 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 Merge Join Operator [MERGEJOIN_219] (rows=34813117 width=535) Conds:RS_40._col0=RS_305._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] @@ -398,7 +398,7 @@ Stage-0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_77] PartitionCols:_col0 - Group By Operator [GBY_76] (rows=84 width=548) + Group By Operator [GBY_76] (rows=3498 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 Merge Join Operator [MERGEJOIN_221] (rows=30966059 width=543) Conds:RS_72._col0=RS_312._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] @@ -478,7 +478,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col0 - Group By Operator [GBY_20] (rows=1704 width=548) + Group By Operator [GBY_20] (rows=78090 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 Merge Join Operator [MERGEJOIN_217] (rows=64325014 width=376) Conds:RS_16._col0=RS_289._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out index 184d281df0..2df373a2a9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query70.q.out @@ -103,22 +103,22 @@ Stage-0 File Output Operator [FS_168] Limit [LIM_167] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_166] (rows=240 width=492) + Select Operator [SEL_166] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_165] - Select Operator [SEL_164] (rows=240 width=492) + Select Operator [SEL_164] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_163] (rows=240 width=304) + PTF Operator [PTF_163] (rows=720 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_162] (rows=240 width=304) + Select Operator [SEL_162] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_161] PartitionCols:(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_160] (rows=240 width=304) + Select Operator [SEL_160] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_159] (rows=240 width=304) + Group By Operator [GBY_159] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_49] @@ -148,7 +148,7 @@ Stage-0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 - Group By Operator [GBY_25] (rows=1704 width=198) + Group By Operator [GBY_25] (rows=2989 width=198) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 Merge Join Operator [MERGEJOIN_133] (rows=91197860 width=168) Conds:RS_21._col1=RS_151._col0(Inner),Output:["_col2","_col5"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out index 78f7c28b7a..2e09c26c51 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out @@ -264,15 +264,15 @@ Stage-0 Stage-1 Reducer 7 vectorized File Output Operator [FS_271] - Limit [LIM_270] (rows=24 width=437) + Limit [LIM_270] (rows=58 width=437) Number of rows:100 - Select Operator [SEL_269] (rows=24 width=437) + Select Operator [SEL_269] (rows=58 width=437) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_268] - Select Operator [SEL_267] (rows=24 width=437) + Select Operator [SEL_267] (rows=58 width=437) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_266] (rows=24 width=445) + Group By Operator [GBY_266] (rows=58 width=445) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 5 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out index dbaecf8fe8..ad1d2a2ba6 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out @@ -258,13 +258,13 @@ Stage-0 File Output Operator [FS_452] Limit [LIM_451] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_450] (rows=38846 width=619) + Select Operator [SEL_450] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_449] - Select Operator [SEL_448] (rows=38846 width=619) + Select Operator [SEL_448] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_447] (rows=38846 width=627) + Group By Operator [GBY_447] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 8 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized @@ -281,7 +281,7 @@ Stage-0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_71] PartitionCols:_col0 - Group By Operator [GBY_70] (rows=46000 width=436) + Group By Operator [GBY_70] (rows=427306 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_68] (rows=8592843 width=305) Output:["_col0","_col1","_col2","_col3"] @@ -512,7 +512,7 @@ Stage-0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=1704 width=436) + Group By Operator [GBY_33] (rows=4932 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_31] (rows=15038783 width=100) Output:["_col0","_col1","_col2","_col3"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out index fd709f99dd..4dc9cd8cc0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query95.q.out @@ -113,12 +113,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_267] Group By Operator [GBY_266] (rows=1 width=232) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_265] (rows=143895019 width=228) + Group By Operator [GBY_265] (rows=38111880083 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_101] PartitionCols:_col0 - Group By Operator [GBY_100] (rows=143895019 width=228) + Group By Operator [GBY_100] (rows=38111880083 width=228) Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 Merge Join Operator [MERGEJOIN_227] (rows=83469759007 width=227) Conds:RS_47._col3=RS_48._col0(Inner),Output:["_col3","_col4","_col5"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query5.q.out b/ql/src/test/results/clientpositive/perf/tez/query5.q.out index 2ce689b1bb..35acfc82b3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query5.q.out @@ -306,13 +306,13 @@ Stage-0 File Output Operator [FS_304] Limit [LIM_303] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_302] (rows=38846 width=619) + Select Operator [SEL_302] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_301] - Select Operator [SEL_300] (rows=38846 width=619) + Select Operator [SEL_300] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_299] (rows=38846 width=627) + Group By Operator [GBY_299] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] vectorized @@ -329,7 +329,7 @@ Stage-0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_47] PartitionCols:_col0 - Group By Operator [GBY_46] (rows=46000 width=548) + Group By Operator [GBY_46] (rows=2835758 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 Merge Join Operator [MERGEJOIN_222] (rows=34813117 width=535) Conds:RS_42._col0=RS_310._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] @@ -400,7 +400,7 @@ Stage-0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_80] PartitionCols:_col0 - Group By Operator [GBY_79] (rows=84 width=548) + Group By Operator [GBY_79] (rows=3498 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 Merge Join Operator [MERGEJOIN_224] (rows=30966059 width=543) Conds:RS_75._col0=RS_318._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] @@ -482,7 +482,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 - Group By Operator [GBY_21] (rows=1704 width=548) + Group By Operator [GBY_21] (rows=78090 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 Merge Join Operator [MERGEJOIN_220] (rows=64325014 width=376) Conds:RS_17._col0=RS_293._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query70.q.out b/ql/src/test/results/clientpositive/perf/tez/query70.q.out index 4650ef66fc..6d2528e474 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query70.q.out @@ -103,22 +103,22 @@ Stage-0 File Output Operator [FS_168] Limit [LIM_167] (rows=100 width=492) Number of rows:100 - Select Operator [SEL_166] (rows=240 width=492) + Select Operator [SEL_166] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_165] - Select Operator [SEL_164] (rows=240 width=492) + Select Operator [SEL_164] (rows=720 width=492) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_163] (rows=240 width=304) + PTF Operator [PTF_163] (rows=720 width=304) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 DESC NULLS LAST","partition by:":"(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END"}] - Select Operator [SEL_162] (rows=240 width=304) + Select Operator [SEL_162] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_161] PartitionCols:(grouping(_col3, 1) + grouping(_col3, 0)), CASE WHEN ((grouping(_col3, 0) = 0)) THEN (_col0) ELSE (CAST( null AS STRING)) END - Select Operator [SEL_160] (rows=240 width=304) + Select Operator [SEL_160] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_159] (rows=240 width=304) + Group By Operator [GBY_159] (rows=720 width=304) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_49] @@ -148,7 +148,7 @@ Stage-0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 - Group By Operator [GBY_25] (rows=1704 width=198) + Group By Operator [GBY_25] (rows=2989 width=198) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5 Merge Join Operator [MERGEJOIN_133] (rows=91197860 width=168) Conds:RS_21._col1=RS_151._col0(Inner),Output:["_col2","_col5"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query80.q.out b/ql/src/test/results/clientpositive/perf/tez/query80.q.out index 223e61aa24..b58acc015f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query80.q.out @@ -258,13 +258,13 @@ Stage-0 File Output Operator [FS_460] Limit [LIM_459] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_458] (rows=38846 width=619) + Select Operator [SEL_458] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_457] - Select Operator [SEL_456] (rows=38846 width=619) + Select Operator [SEL_456] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_455] (rows=38846 width=627) + Group By Operator [GBY_455] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 8 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized @@ -281,7 +281,7 @@ Stage-0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_75] PartitionCols:_col0 - Group By Operator [GBY_74] (rows=46000 width=436) + Group By Operator [GBY_74] (rows=427306 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_72] (rows=8592843 width=305) Output:["_col0","_col1","_col2","_col3"] @@ -520,7 +520,7 @@ Stage-0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col0 - Group By Operator [GBY_35] (rows=1704 width=436) + Group By Operator [GBY_35] (rows=4932 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_33] (rows=15038783 width=100) Output:["_col0","_col1","_col2","_col3"]