diff --git a/data/files/filterCard.txt b/data/files/filterCard.txt new file mode 100644 index 0000000000..6246bfb86e --- /dev/null +++ b/data/files/filterCard.txt @@ -0,0 +1,101 @@ +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +10,10,10 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 +20,20,20 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 423913b56b..4f7596a9f8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -280,7 +280,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // evaluate filter expression and update statistics long newNumRows = evaluateExpression(parentStats, pred, aspCtx, - neededCols, fop, 0); + neededCols, fop, parentStats.getNumRows()); Statistics st = parentStats.clone(); if (satisfyPrecondition(parentStats)) { @@ -318,13 +318,13 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, protected long evaluateExpression(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List neededCols, - Operator op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { + Operator op, long currNumRows) throws CloneNotSupportedException, SemanticException { long newNumRows = 0; Statistics andStats = null; - if (stats.getNumRows() <= 1 || stats.getDataSize() <= 0) { + if (currNumRows <= 1 || stats.getDataSize() <= 0) { if (LOG.isDebugEnabled()) { - LOG.debug("Estimating row count for " + pred + " Original num rows: " + stats.getNumRows() + + LOG.debug("Estimating row count for " + pred + " Original num rows: " + currNumRows + " Original data size: " + stats.getDataSize() + " New num rows: 1"); } return 1; @@ -340,41 +340,40 @@ protected long evaluateExpression(Statistics stats, ExprNodeDesc pred, aspCtx.setAndExprStats(andStats); // evaluate children + long evaluatedRowCount = currNumRows; for (ExprNodeDesc child : genFunc.getChildren()) { - newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, + evaluatedRowCount = evaluateChildExpr(aspCtx.getAndExprStats(), child, aspCtx, neededCols, op, evaluatedRowCount); - if (satisfyPrecondition(aspCtx.getAndExprStats())) { - updateStats(aspCtx.getAndExprStats(), newNumRows, true, op); - } else { - updateStats(aspCtx.getAndExprStats(), newNumRows, false, op); - } + } + newNumRows = evaluatedRowCount; + if (satisfyPrecondition(aspCtx.getAndExprStats())) { + updateStats(aspCtx.getAndExprStats(), newNumRows, true, op); + } else { + updateStats(aspCtx.getAndExprStats(), newNumRows, false, op); } } else if (udf instanceof GenericUDFOPOr) { // for OR condition independently compute and update stats. for (ExprNodeDesc child : genFunc.getChildren()) { - // early exit if OR evaluation yields more rows than input rows - if (evaluatedRowCount >= stats.getNumRows()) { - evaluatedRowCount = stats.getNumRows(); - } else { newNumRows = StatsUtils.safeAdd( - evaluateChildExpr(stats, child, aspCtx, neededCols, op, evaluatedRowCount), + evaluateChildExpr(stats, child, aspCtx, neededCols, op, currNumRows), newNumRows); - evaluatedRowCount = newNumRows; - } + } + if(newNumRows > currNumRows) { + newNumRows = currNumRows; } } else if (udf instanceof GenericUDFIn) { // for IN clause - newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, op); + newNumRows = evaluateInExpr(stats, pred, currNumRows, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFBetween) { // for BETWEEN clause - newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, op); + newNumRows = evaluateBetweenExpr(stats, pred, currNumRows, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFOPNot) { - newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, op); + newNumRows = evaluateNotExpr(stats, pred, currNumRows, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFOPNotNull) { - return evaluateNotNullExpr(stats, genFunc); + return evaluateNotNullExpr(stats, genFunc, currNumRows); } else { // single predicate condition - newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op, evaluatedRowCount); + newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op,currNumRows); } } else if (pred instanceof ExprNodeColumnDesc) { @@ -413,10 +412,10 @@ protected long evaluateExpression(Statistics stats, ExprNodeDesc pred, return newNumRows; } - private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, + private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List neededCols, Operator op) throws SemanticException { - long numRows = stats.getNumRows(); + long numRows = currNumRows; ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; @@ -503,7 +502,7 @@ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsPr return Math.round( (double) numRows * factor * inFactor); } - private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, + private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List neededCols, Operator op) throws SemanticException, CloneNotSupportedException { final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; final boolean invert = Boolean.TRUE.equals( @@ -515,7 +514,7 @@ private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateSt // Short circuit and return the current number of rows if this is a // synthetic predicate with dynamic values if (leftExpression instanceof ExprNodeDynamicValueDesc) { - return stats.getNumRows(); + return currNumRows; } // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). @@ -532,14 +531,14 @@ private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateSt new GenericUDFOPNot(), Lists.newArrayList(newExpression)); } - return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, 0); + return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, currNumRows); } - private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, + private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long currNumRows, AnnotateStatsProcCtx aspCtx, List neededCols, Operator op) throws CloneNotSupportedException, SemanticException { - long numRows = stats.getNumRows(); + long numRows = currNumRows; // if the evaluate yields true then pass all rows else pass 0 rows if (pred instanceof ExprNodeGenericFuncDesc) { @@ -551,7 +550,7 @@ private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long newNumRows = 0; for (ExprNodeDesc child : genFunc.getChildren()) { newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols, - op, 0); + op, numRows); } return numRows - newNumRows; } else if (leaf instanceof ExprNodeConstantDesc) { @@ -583,9 +582,9 @@ private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, return numRows / 2; } - private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred) { + private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred, long currNumRows) { - long numRows = stats.getNumRows(); + long numRows = currNumRows; if (pred instanceof ExprNodeGenericFuncDesc) { @@ -607,9 +606,9 @@ private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred) { return numRows / 2; } - private long evaluateNotNullExpr(Statistics parentStats, ExprNodeGenericFuncDesc pred) { + private long evaluateNotNullExpr(Statistics parentStats, ExprNodeGenericFuncDesc pred, long currNumRows) { long noOfNulls = getMaxNulls(parentStats, pred); - long parentCardinality = parentStats.getNumRows(); + long parentCardinality = currNumRows; long newPredCardinality = parentCardinality; if (parentCardinality > noOfNulls) { @@ -659,8 +658,8 @@ private long getMaxNulls(Statistics stats, ExprNodeDesc pred) { return maxNoNulls; } - private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFunc) { - long numRows = stats.getNumRows(); + private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFunc, long currNumRows) { + long numRows = currNumRows; GenericUDF udf = genFunc.getGenericUDF(); ExprNodeColumnDesc columnDesc; @@ -836,9 +835,9 @@ private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFun private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List neededCols, - Operator op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { + Operator op, long currNumRows) throws CloneNotSupportedException, SemanticException { - long numRows = stats.getNumRows(); + long numRows = currNumRows; if (child instanceof ExprNodeGenericFuncDesc) { @@ -915,15 +914,15 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, || udf instanceof GenericUDFOPEqualOrLessThan || udf instanceof GenericUDFOPGreaterThan || udf instanceof GenericUDFOPLessThan) { - return evaluateComparator(stats, genFunc); + return evaluateComparator(stats, genFunc, numRows); } else if (udf instanceof GenericUDFOPNotNull) { - return evaluateNotNullExpr(stats, genFunc); + return evaluateNotNullExpr(stats, genFunc, numRows); } else if (udf instanceof GenericUDFOPNull) { - return evaluateColEqualsNullExpr(stats, genFunc); + return evaluateColEqualsNullExpr(stats, genFunc, numRows); } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr || udf instanceof GenericUDFIn || udf instanceof GenericUDFBetween || udf instanceof GenericUDFOPNot) { - return evaluateExpression(stats, genFunc, aspCtx, neededCols, op, evaluatedRowCount); + return evaluateExpression(stats, genFunc, aspCtx, neededCols, op, numRows); } else if (udf instanceof GenericUDFInBloomFilter) { if (genFunc.getChildren().get(1) instanceof ExprNodeDynamicValueDesc) { // Synthetic predicates from semijoin opt should not affect stats. @@ -934,7 +933,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, if (Boolean.FALSE.equals(((ExprNodeConstantDesc) child).getValue())) { return 0; } else { - return stats.getNumRows(); + return numRows; } } @@ -1582,7 +1581,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // evaluate filter expression and update statistics try { newNumRows = evaluateExpression(stats, pred, - aspCtx, jop.getSchema().getColumnNames(), jop, 0); + aspCtx, jop.getSchema().getColumnNames(), jop, stats.getNumRows()); } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } @@ -1671,7 +1670,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // evaluate filter expression and update statistics try { newNumRows = evaluateExpression(wcStats, pred, - aspCtx, jop.getSchema().getColumnNames(), jop, 0); + aspCtx, jop.getSchema().getColumnNames(), jop, wcStats.getNumRows()); } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } diff --git a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out index 6e2975e671..da1d03c2d6 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -121,9 +121,9 @@ STAGE PLANS: Statistics: Num rows: 2098 Data size: 16744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((t = 1) and (si = 2)) or ((t = 2) and (si = 3)) or ((t = 3) and (si = 4)) or ((t = 4) and (si = 5)) or ((t = 5) and (si = 6)) or ((t = 6) and (si = 7)) or ((t = 7) and (si = 8)) or ((t = 9) and (si = 10)) or ((t = 10) and (si = 11)) or ((t = 11) and (si = 12)) or ((t = 12) and (si = 13)) or ((t = 13) and (si = 14)) or ((t = 14) and (si = 15)) or ((t = 15) and (si = 16)) or ((t = 16) and (si = 17)) or ((t = 17) and (si = 18)) or ((t = 27) and (si = 28)) or ((t = 37) and (si = 38)) or ((t = 47) and (si = 48)) or ((t = 52) and (si = 53))) (type: boolean) - Statistics: Num rows: 160 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 160 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index e04c1c6bc5..d88819a1ec 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -577,16 +577,16 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -595,10 +595,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 524 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 122 Data size: 488 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -707,16 +707,16 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -725,10 +725,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1015,16 +1015,16 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 7676 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_quantity > 10) and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 321 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 2284 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE @@ -1065,14 +1065,14 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 321 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 297 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1102,16 +1102,16 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 7664 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ss_store_sk is not null and ss_addr_sk is not null) (type: boolean) - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_addr_sk (type: int), ss_store_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 916 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7004 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) TableScan alias: s @@ -1136,7 +1136,7 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col2 - Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7132 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1152,7 +1152,7 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 916 Data size: 7148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 914 Data size: 7132 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) TableScan alias: ca @@ -1177,14 +1177,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 241 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 243 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 48c02a24ad..03ebe375b8 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -388,25 +388,25 @@ Stage-0 <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=2 width=101) + Group By Operator [GBY_6] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=5 width=93) + Filter Operator [FIL_39] (rows=4 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 8 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_17] (rows=2 width=89) + Select Operator [SEL_17] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=2 width=93) + Group By Operator [GBY_16] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=2 width=93) + Group By Operator [GBY_14] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=5 width=93) + Filter Operator [FIL_40] (rows=4 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_11] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1088,25 +1088,25 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=40 width=101) + Select Operator [SEL_13] (rows=48 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_24] (rows=40 width=101) + Merge Join Operator [MERGEJOIN_24] (rows=48 width=101) Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"],residual filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or (_col6 >= 0))} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_2] (rows=8 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_21] (rows=9 width=93) + Filter Operator [FIL_21] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) + Select Operator [SEL_5] (rows=8 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=9 width=93) + Filter Operator [FIL_22] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1135,25 +1135,25 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=40 width=101) + Select Operator [SEL_13] (rows=48 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_24] (rows=40 width=101) + Merge Join Operator [MERGEJOIN_24] (rows=48 width=101) Conds:RS_9._col0=RS_10._col0(Inner),RS_9._col0=RS_11._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"],residual filter predicates:{((_col1 + _col4) = 2)} {((_col1 > 0) or (_col6 >= 0))} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_2] (rows=8 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_21] (rows=9 width=93) + Filter Operator [FIL_21] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 - Select Operator [SEL_5] (rows=9 width=89) + Select Operator [SEL_5] (rows=8 width=89) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=9 width=93) + Filter Operator [FIL_22] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1416,16 +1416,16 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Select Operator [SEL_11] (rows=9 width=4) + Select Operator [SEL_11] (rows=8 width=4) Output:["_col0"] - Merge Join Operator [MERGEJOIN_17] (rows=9 width=4) + Merge Join Operator [MERGEJOIN_17] (rows=8 width=4) Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_2] (rows=8 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=9 width=93) + Filter Operator [FIL_15] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1463,20 +1463,20 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_2] (rows=9 width=93) + Select Operator [SEL_2] (rows=8 width=93) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_25] (rows=9 width=93) + Filter Operator [FIL_25] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_0] (rows=20 width=88) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=4 width=85) + Group By Operator [GBY_10] (rows=3 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=9 width=85) + Select Operator [SEL_5] (rows=8 width=85) Output:["_col0"] - Filter Operator [FIL_26] (rows=9 width=93) + Filter Operator [FIL_26] (rows=8 width=93) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0)) and key is not null) TableScan [TS_3] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out index c45210e562..8fc23c486f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out @@ -29,7 +29,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 5) -> boolean, SelectColumnIsNotNull(col 2) -> boolean, SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 8) -> boolean) -> boolean predicate: (cdouble is not null and cint is not null and cboolean1 is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 5112 Data size: 265564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2945 Data size: 152996 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -38,7 +38,7 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 2, 10, 8, 12, 13, 14, 15] selectExpressions: CastDoubleToDecimal(col 5) -> 12:decimal(20,10), CastLongToDecimal(col 2) -> 13:decimal(23,14), CastLongToDecimal(col 10) -> 14:decimal(5,2), CastTimestampToDecimal(col 8) -> 15:decimal(15,0) - Statistics: Num rows: 5112 Data size: 2410700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2945 Data size: 1388804 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out b/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out index 32d100180d..b1e0b14d4d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_if_expr.q.out @@ -34,7 +34,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsTrue(col 10) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: (cboolean1 and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 4587 Data size: 13704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 9052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), if(cboolean1, 'first', 'second') (type: string) outputColumnNames: _col0, _col1 @@ -43,7 +43,7 @@ STAGE PLANS: native: true projectedOutputColumns: [10, 12] selectExpressions: IfExprStringScalarStringScalar(col 10, val first, val second) -> 12:String - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 566572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -51,7 +51,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 566572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -80,13 +80,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 566572 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3030 Data size: 566572 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index db76eaaf1b..2307843a2c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -30611,17 +30611,17 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -30841,17 +30841,17 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 6990 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out index f06c2dbcc6..1a0e846eb1 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 5461 Data size: 1107444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9557 Data size: 1937820 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -82,13 +82,13 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double - Statistics: Num rows: 5461 Data size: 1082056 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9557 Data size: 1893568 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5461 Data size: 1082056 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9557 Data size: 1893568 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out index 1c07962d83..a66ea36362 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -76,7 +76,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 4778 Data size: 640688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 549274 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -85,7 +85,7 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17] selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double - Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -95,7 +95,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [6, 2, 8, 5, 14, 15, 13, 16, 18, 19, 21, 17] - Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -136,13 +136,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13] - Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out index 6c32ccf42d..9e13ea6099 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -82,7 +82,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 1342196 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -91,7 +91,7 @@ STAGE PLANS: native: true projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -101,7 +101,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [] - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -142,7 +142,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: @@ -331,7 +331,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 1342196 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -340,7 +340,7 @@ STAGE PLANS: native: true projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -348,7 +348,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -377,7 +377,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 66764cf42b..fe4a30fa9c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -622,7 +622,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11346 Data size: 2856120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double) outputColumnNames: ctinyint, csmallint, cint, cbigint, cdouble @@ -630,7 +630,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 5] - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11346 Data size: 2856120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) Group By Vectorization: @@ -3206,7 +3206,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) - Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7153 Data size: 1514550 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cboolean1 (type: boolean) outputColumnNames: ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cboolean1 @@ -3214,7 +3214,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 10] - Statistics: Num rows: 7845 Data size: 1661020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7153 Data size: 1514550 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 2a95065203..eb6eabea40 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -40,11 +40,11 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 661228 Data size: 7913928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 585044 Data size: 7002120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: smallint), _col3 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 661228 Data size: 7913928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 585044 Data size: 7002120 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -54,7 +54,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 1452263 Data size: 11604232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1284939 Data size: 10267240 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) mode: hash @@ -81,16 +81,16 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ctinyint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 6848 Data size: 81820 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6059 Data size: 72396 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6848 Data size: 81820 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6059 Data size: 72396 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 6848 Data size: 81820 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6059 Data size: 72396 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: smallint), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out b/ql/src/test/results/clientpositive/mapjoin47.q.out index 2904b68813..89047295ed 100644 --- a/ql/src/test/results/clientpositive/mapjoin47.q.out +++ b/ql/src/test/results/clientpositive/mapjoin47.q.out @@ -378,13 +378,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 or UDFToDouble(_col3) BETWEEN 100.0 AND 102.0)} - Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -488,13 +488,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 residual filter predicates: {((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0)} {((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0)} - Statistics: Num rows: 1388 Data size: 26738 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -592,13 +592,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 residual filter predicates: {(((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0) or ((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0))} - Statistics: Num rows: 8332 Data size: 160507 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -698,13 +698,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 residual filter predicates: {(struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102))} - Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -817,17 +817,17 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) >= 100.0)} - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -952,13 +952,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)} - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1064,7 +1064,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 residual filter predicates: {((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0)} - Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1108,13 +1108,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4582 Data size: 88278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1130,7 +1130,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) TableScan alias: b @@ -1153,13 +1153,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4582 Data size: 88278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1282,13 +1282,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)} - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1393,12 +1393,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 residual filter predicates: {((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0)} - Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Local Work: Map Reduce Local Work @@ -1410,13 +1410,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4582 Data size: 88278 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1556,13 +1556,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)} - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1731,17 +1731,17 @@ STAGE PLANS: 3 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 residual filter predicates: {((_col6 = _col24) or ((_col28 = _col16) and (_col8 = 42)))} {((_col12 = _col24) or ((_col27 = _col15) and (_col13 = _col25)))} {((_col0 = _col24) or ((_col1 = _col25) and (_col26 = _col20)))} - Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col18 (type: string), _col19 (type: string), _col20 (type: int), _col21 (type: float), _col22 (type: boolean), _col23 (type: string), _col24 (type: string), _col25 (type: string), _col26 (type: int), _col27 (type: float), _col28 (type: boolean), _col29 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: float), _col16 (type: boolean), _col17 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: float), _col10 (type: boolean), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 - Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 53 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 53 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 53 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/query23.q.out b/ql/src/test/results/clientpositive/perf/query23.q.out index dde707d0ea..5925869cbf 100644 --- a/ql/src/test/results/clientpositive/perf/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out index 074600ef02..55c14a7c44 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_47.q.out @@ -397,13 +397,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 16 Data size: 317 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -507,13 +507,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col0 + _col3) >= 100)} {((_col0 + _col3) <= 102)} - Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -605,13 +605,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(((_col0 + _col3) >= 100) or ((_col0 + _col3) <= 102))} - Statistics: Num rows: 16 Data size: 317 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -711,13 +711,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(struct(_col0,_col3)) IN (const struct(100,100), const struct(101,101), const struct(102,102))} - Statistics: Num rows: 6 Data size: 119 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 6 Data size: 119 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 119 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1012,7 +1012,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col3 + _col0) >= 100)} - Statistics: Num rows: 8 Data size: 158 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1056,13 +1056,13 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1078,7 +1078,7 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 8 Data size: 158 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) TableScan alias: b @@ -1101,13 +1101,13 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1314,12 +1314,12 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col3 + _col0) >= 100)} - Statistics: Num rows: 8 Data size: 158 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 8 Data size: 158 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Local Work: Map Reduce Local Work @@ -1331,13 +1331,13 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1477,13 +1477,13 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 residual filter predicates: {((_col6 + _col0) <= 102)} - Statistics: Num rows: 12 Data size: 246 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out index 1f953575e3..ed0319da64 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -82,13 +82,13 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 4e26314499..813bf7d883 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -92,7 +92,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10, col 11) -> boolean, FilterLongColNotEqualLongColumn(col 0, col 1)(children: col 0) -> boolean) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11, val 1) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 1)(children: col 1) -> boolean) -> boolean) -> boolean) -> boolean predicate: (ctimestamp1 is null and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint))))) (type: boolean) - Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) outputColumnNames: cbigint, cdouble, cstring1, cboolean1 @@ -100,7 +100,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3, 5, 6, 10] - Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) Group By Vectorization: @@ -115,7 +115,7 @@ STAGE PLANS: keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) sort order: ++++ @@ -126,7 +126,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [4, 5, 6, 7, 8] - Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) Execution mode: vectorized Map Vectorization: @@ -172,7 +172,7 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 @@ -181,7 +181,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 9:double, LongColUnaryMinus(col 1) -> 10:long, LongColMultiplyLongColumn(col 1, col 4) -> 11:long, DoubleColDivideDoubleScalar(col 12, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14)(children: DoubleColDivideDoubleScalar(col 12, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16)(children: CastLongToDecimal(col 1) -> 16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 6, col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 14)(children: DoubleColUnaryMinus(col 19)(children: DoubleColDivideDoubleScalar(col 14, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20, col 21)(children: DoubleColDivideDoubleScalar(col 14, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 20:double, DoubleColUnaryMinus(col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 21:double) -> 14:double - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ @@ -191,7 +191,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 Execution mode: vectorized @@ -218,13 +218,13 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 3, 2, 19, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 11, 16, 17, 18] selectExpressions: ConstantVectorExpression(val null) -> 19:timestamp - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out index b43c506c6f..e85c2ba3db 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -75,7 +75,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -84,7 +84,7 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17] selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -94,7 +94,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: [6, 2, 8, 5, 14, 15, 13, 16, 18, 19, 21, 17] - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -134,13 +134,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13] - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_10.q.out b/ql/src/test/results/clientpositive/vectorization_10.q.out index d2d9bf644b..791414ff4f 100644 --- a/ql/src/test/results/clientpositive/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/vectorization_10.q.out @@ -70,7 +70,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -79,13 +79,13 @@ STAGE PLANS: native: true projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9557 Data size: 2054789 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_12.q.out b/ql/src/test/results/clientpositive/vectorization_12.q.out index 63ea984a33..2f08aaa11b 100644 --- a/ql/src/test/results/clientpositive/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/vectorization_12.q.out @@ -87,7 +87,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10, col 11) -> boolean, FilterLongColNotEqualLongColumn(col 0, col 1)(children: col 0) -> boolean) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11, val 1) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 1)(children: col 1) -> boolean) -> boolean) -> boolean) -> boolean predicate: (ctimestamp1 is null and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint))))) (type: boolean) - Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) outputColumnNames: cbigint, cdouble, cstring1, cboolean1 @@ -95,7 +95,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3, 5, 6, 10] - Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) Group By Vectorization: @@ -110,7 +110,7 @@ STAGE PLANS: keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) sort order: ++++ @@ -120,7 +120,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) Execution mode: vectorized Map Vectorization: @@ -152,11 +152,11 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -179,7 +179,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Execution mode: vectorized Map Vectorization: @@ -203,10 +203,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: string), null (type: timestamp), KEY.reducesinkkey0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(22,2)), VALUE._col11 (type: bigint), VALUE._col12 (type: double), VALUE._col8 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_17.q.out b/ql/src/test/results/clientpositive/vectorization_17.q.out index c0186082df..d60c28a3fc 100644 --- a/ql/src/test/results/clientpositive/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/vectorization_17.q.out @@ -70,7 +70,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -79,7 +79,7 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17] selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -88,7 +88,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -113,10 +113,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_7.q.out b/ql/src/test/results/clientpositive/vectorization_7.q.out index 218d307915..afa9891a13 100644 --- a/ql/src/test/results/clientpositive/vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/vectorization_7.q.out @@ -76,7 +76,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -85,7 +85,7 @@ STAGE PLANS: native: true projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -94,7 +94,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -119,7 +119,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE @@ -296,7 +296,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean) - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -305,7 +305,7 @@ STAGE PLANS: native: true projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -314,7 +314,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -333,7 +333,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 7281 Data size: 1565441 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 25 Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE