diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index bf6583e..2bc765c 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1611,6 +1611,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVESAMPLINGNUMBERFORORDERBY("hive.optimize.sampling.orderby.number", 1000, "Total number of samples to be obtained."), HIVESAMPLINGPERCENTFORORDERBY("hive.optimize.sampling.orderby.percent", 0.1f, new RatioValidator(), "Probability with which a row will be chosen."), + HIVE_REMOVE_ORDERBY_IN_SUBQUERY("hive.remove.orderby.in.subquery", true, + "If set to true, order/sort by without limit in sub queries will be removed."), HIVEOPTIMIZEDISTINCTREWRITE("hive.optimize.distinct.rewrite", true, "When applicable this " + "optimization rewrites distinct aggregates from a single stage to multi-stage " + "aggregation. This may not be optimal in all cases. Ideally, whether to trigger it or " diff --git a/contrib/src/test/results/clientpositive/udf_row_sequence.q.out b/contrib/src/test/results/clientpositive/udf_row_sequence.q.out index 9715c75..094a71a 100644 --- a/contrib/src/test/results/clientpositive/udf_row_sequence.q.out +++ b/contrib/src/test/results/clientpositive/udf_row_sequence.q.out @@ -39,11 +39,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), row_sequence() (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: row_sequence() (type: bigint) + key expressions: _col1 (type: bigint) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) diff --git a/data/scripts/input20_script.py b/data/scripts/input20_script.py index 40e3683..223fa2b 100644 --- a/data/scripts/input20_script.py +++ b/data/scripts/input20_script.py @@ -18,13 +18,13 @@ # import sys import re -line = sys.stdin.readline() -x = 1 -while line: - tem = sys.stdin.readline() - if line == tem: - x = x + 1 +dict = {} +for line in sys.stdin.readlines(): + if dict.has_key(line): + x = dict[line] + dict[line] = x + 1 else: - print str(x).strip()+'\t'+re.sub('\t','_',line.strip()) - line = tem - x = 1 \ No newline at end of file + dict[line] = 1 +for key in dict: + x = dict[key] + print str(x).strip()+'\t'+re.sub('\t','_',key.strip()) \ No newline at end of file diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 0aadee3..295fe0c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -907,7 +907,6 @@ spark.query.files=add_part_multiple.q, \ decimal_1_1.q, \ decimal_join.q, \ disable_merge_for_bucketing.q, \ - dynamic_rdd_cache.q, \ enforce_order.q, \ escape_clusterby1.q, \ escape_distributeby1.q, \ @@ -1387,6 +1386,7 @@ spark.query.files=add_part_multiple.q, \ spark.only.query.files=spark_combine_equivalent_work.q,\ spark_dynamic_partition_pruning.q,\ spark_dynamic_partition_pruning_2.q,\ + dynamic_rdd_cache.q, \ spark_multi_insert_parallel_orderby.q,\ spark_explainuser_1.q,\ spark_vectorized_dynamic_partition_pruning.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 699fcb4..01d19f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -11256,6 +11256,51 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept return genPlan(qb); } + private void removeOBInSubQuery(QBExpr qbExpr) { + if (qbExpr == null) { + return; + } + + if (qbExpr.getOpcode() == QBExpr.Opcode.NULLOP) { + QB subQB = qbExpr.getQB(); + QBParseInfo parseInfo = subQB.getParseInfo(); + String alias = qbExpr.getAlias(); + Map destToOrderBy = parseInfo.getDestToOrderBy(); + Map destToSortBy = parseInfo.getDestToSortBy(); + final String warning = "WARNING: Order/Sort by without limit in sub query or view [" + + alias + "] is removed, as it's pointless and bad for performance."; + if (destToOrderBy != null) { + for (String dest : destToOrderBy.keySet()) { + if (parseInfo.getDestLimit(dest) == null) { + removeASTChild(destToOrderBy.get(dest)); + destToOrderBy.remove(dest); + console.printInfo(warning); + } + } + } + if (destToSortBy != null) { + for (String dest : destToSortBy.keySet()) { + if (parseInfo.getDestLimit(dest) == null) { + removeASTChild(destToSortBy.get(dest)); + destToSortBy.remove(dest); + console.printInfo(warning); + } + } + } + } else { + removeOBInSubQuery(qbExpr.getQBExpr1()); + removeOBInSubQuery(qbExpr.getQBExpr2()); + } + } + + private static void removeASTChild(ASTNode node) { + Tree parent = node.getParent(); + if (parent != null) { + parent.deleteChild(node.getChildIndex()); + node.setParent(null); + } + } + void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { // 1. Generate Resolved Parse tree from syntax tree LOG.info("Starting Semantic Analysis"); @@ -11265,6 +11310,12 @@ void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticExce return; } + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY)) { + for (String alias : qb.getSubqAliases()) { + removeOBInSubQuery(qb.getSubqForAlias(alias)); + } + } + // 2. Gen OP Tree from resolved Parse Tree Operator sinkOp = genOPTree(ast, plannerCtx); diff --git a/ql/src/test/queries/clientpositive/concat_op.q b/ql/src/test/queries/clientpositive/concat_op.q index 8bbbb92..3fa0ce4 100644 --- a/ql/src/test/queries/clientpositive/concat_op.q +++ b/ql/src/test/queries/clientpositive/concat_op.q @@ -24,16 +24,16 @@ create table ct2 (c int); insert into ct1 values (7),(5),(3),(1); insert into ct2 values (8),(6),(4),(2); -create view ct_v1 as select * from ct1 union all select * from ct2 order by c; +create view ct_v1 as select * from ct1 union all select * from ct2; -select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1; +select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c; select *, 'x' || (c&3) , 'a' || c*c+c || 'b' from ct_v1 order by 'a' || c*c+c || 'b'; -select 'x' || (c&3),collect_list(c) from ct_v1 - group by 'x' || (c&3); +select 'x' || (c&3) from ct_v1 + group by 'x' || (c&3) order by 'x' || (c&3); explain select concat('a','b','c'); explain select 'a' || 'b' || 'c'; diff --git a/ql/src/test/queries/clientpositive/correlationoptimizer14.q b/ql/src/test/queries/clientpositive/correlationoptimizer14.q index 5547f25..7f191d2 100644 --- a/ql/src/test/queries/clientpositive/correlationoptimizer14.q +++ b/ql/src/test/queries/clientpositive/correlationoptimizer14.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.optimize.reducededuplication=true; set hive.optimize.reducededuplication.min.reducer=1; set hive.optimize.correlation=true; +set hive.remove.orderby.in.subquery=false; -- This file is used to show plans of queries involving cluster by, distribute by, -- order by, and sort by. -- Right now, Correlation optimizer check the most restrictive condition diff --git a/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q b/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q index a012ae2..6a44dd1 100644 --- a/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q +++ b/ql/src/test/queries/clientpositive/groupby_distinct_samekey.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; -- This test covers HIVE-2332 +-- SORT_QUERY_RESULTS create table t1 (int1 int, int2 int, str1 string, str2 string); diff --git a/ql/src/test/queries/clientpositive/input20.q b/ql/src/test/queries/clientpositive/input20.q index ff430ab..3c2f78f 100644 --- a/ql/src/test/queries/clientpositive/input20.q +++ b/ql/src/test/queries/clientpositive/input20.q @@ -7,8 +7,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -19,12 +18,11 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value USING 'python input20_script.py' AS key, value; -SELECT * FROM dest1 SORT BY key, value; +SELECT * FROM dest1 ORDER BY key, value; diff --git a/ql/src/test/queries/clientpositive/input33.q b/ql/src/test/queries/clientpositive/input33.q index 8b6b215..3309045 100644 --- a/ql/src/test/queries/clientpositive/input33.q +++ b/ql/src/test/queries/clientpositive/input33.q @@ -7,8 +7,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -19,12 +18,11 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value USING 'python input20_script.py' AS (key STRING, value STRING); -SELECT * FROM dest1 SORT BY key, value; +SELECT * FROM dest1 ORDER BY key, value; diff --git a/ql/src/test/queries/clientpositive/input3_limit.q b/ql/src/test/queries/clientpositive/input3_limit.q index f983aca..3e9af60 100644 --- a/ql/src/test/queries/clientpositive/input3_limit.q +++ b/ql/src/test/queries/clientpositive/input3_limit.q @@ -7,11 +7,11 @@ LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T1; CREATE TABLE T2(key STRING, value STRING); EXPLAIN -INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20; +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20; -INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20; +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20; -SELECT * FROM T2 SORT BY key, value; +SELECT * FROM T2 ORDER BY key, value; diff --git a/ql/src/test/queries/clientpositive/ppd2.q b/ql/src/test/queries/clientpositive/ppd2.q index b955652..75eb6a8 100644 --- a/ql/src/test/queries/clientpositive/ppd2.q +++ b/ql/src/test/queries/clientpositive/ppd2.q @@ -2,6 +2,8 @@ set hive.mapred.mode=nonstrict; set hive.optimize.ppd=true; set hive.ppd.remove.duplicatefilters=true; +-- SORT_QUERY_RESULTS + explain select b.key,b.cc from ( diff --git a/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q b/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q index 798dddc..8c9ff66 100644 --- a/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q +++ b/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q @@ -2,6 +2,7 @@ set hive.mapred.mode=nonstrict; set hive.optimize.reducededuplication=true; set hive.optimize.reducededuplication.min.reducer=1; set hive.map.aggr=true; +set hive.remove.orderby.in.subquery=false; -- HIVE-2340 deduplicate RS followed by RS -- hive.optimize.reducededuplication : wherther using this optimization diff --git a/ql/src/test/queries/clientpositive/truncate_column_buckets.q b/ql/src/test/queries/clientpositive/truncate_column_buckets.q index 1cda1bf..c51a98f 100644 --- a/ql/src/test/queries/clientpositive/truncate_column_buckets.q +++ b/ql/src/test/queries/clientpositive/truncate_column_buckets.q @@ -11,7 +11,7 @@ INSERT OVERWRITE TABLE test_tab SELECT * FROM src; SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a; +)a ORDER BY file_name DESC; -- Truncate a column on which the table is not bucketed TRUNCATE TABLE test_tab COLUMNS (value); @@ -21,4 +21,4 @@ TRUNCATE TABLE test_tab COLUMNS (value); SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a; +)a ORDER BY file_name DESC; diff --git a/ql/src/test/results/clientpositive/auto_join0.q.out b/ql/src/test/results/clientpositive/auto_join0.q.out index 77940b3..d15196e 100644 --- a/ql/src/test/results/clientpositive/auto_join0.q.out +++ b/ql/src/test/results/clientpositive/auto_join0.q.out @@ -1,5 +1,5 @@ -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-6:MAPRED' is a cross product Warning: Shuffle Join JOIN[12][tables = [src1, src2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) @@ -25,16 +25,15 @@ SELECT src1.key as k1, src1.value as v1, POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2 + Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-8 + Stage-3 depends on stages: Stage-2, Stage-5, Stage-6 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 - Stage-4 depends on stages: Stage-3 - Stage-10 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-10 Stage-2 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -66,10 +65,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: a:$INTNAME1 @@ -83,7 +82,7 @@ STAGE PLANS: 0 1 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -95,12 +94,17 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -109,31 +113,6 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) @@ -151,7 +130,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-10 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: a:$INTNAME @@ -165,7 +144,7 @@ STAGE PLANS: 0 1 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -177,12 +156,17 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -208,14 +192,19 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -250,8 +239,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-7:MAPRED' is a cross product -Warning: Map Join MAPJOIN[32][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-5:MAPRED' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-6:MAPRED' is a cross product Warning: Shuffle Join JOIN[12][tables = [src1, src2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( diff --git a/ql/src/test/results/clientpositive/auto_join15.q.out b/ql/src/test/results/clientpositive/auto_join15.q.out index 18f9b6a..fc4eb74 100644 --- a/ql/src/test/results/clientpositive/auto_join15.q.out +++ b/ql/src/test/results/clientpositive/auto_join15.q.out @@ -15,13 +15,12 @@ SORT BY k1, v1, k2, v2 ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: a:src1 @@ -61,38 +60,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git a/ql/src/test/results/clientpositive/auto_join20.q.out b/ql/src/test/results/clientpositive/auto_join20.q.out index 9d97fe5..8aa2f9a 100644 --- a/ql/src/test/results/clientpositive/auto_join20.q.out +++ b/ql/src/test/results/clientpositive/auto_join20.q.out @@ -15,13 +15,12 @@ SORT BY k1,v1,k2,v2,k3,v3 )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: a:src1 @@ -88,38 +87,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -175,13 +154,12 @@ SORT BY k1,v1,k2,v2,k3,v3 )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: a:src1 @@ -248,38 +226,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git a/ql/src/test/results/clientpositive/auto_join31.q.out b/ql/src/test/results/clientpositive/auto_join31.q.out index 1cf86cf..90aecae 100644 --- a/ql/src/test/results/clientpositive/auto_join31.q.out +++ b/ql/src/test/results/clientpositive/auto_join31.q.out @@ -21,224 +21,35 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-8 depends on stages: Stage-1, Stage-4, Stage-5 , consists of Stage-9, Stage-10, Stage-2 - Stage-9 has a backup stage: Stage-2 - Stage-6 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 - Stage-10 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-10 - Stage-2 - Stage-4 is a root stage - Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - $INTNAME2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - $INTNAME2 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + x:src Fetch Operator limit: -1 - $INTNAME1 + y:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + x:src TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + y:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -246,49 +57,56 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out index 942e447..3878bd3 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -22,13 +22,12 @@ SELECT cbo_t1.key as k1, cbo_t1.value as v1, ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: a:cbo_t1:cbo_t3 @@ -76,38 +75,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -131,7 +110,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -155,13 +134,12 @@ SELECT cbo_t1.key as k1, cbo_t1.value as v1, ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: a:cbo_t1:cbo_t3 @@ -209,38 +187,18 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 12240 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git a/ql/src/test/results/clientpositive/concat_op.q.out b/ql/src/test/results/clientpositive/concat_op.q.out index e7fad1f..17a0e31 100644 --- a/ql/src/test/results/clientpositive/concat_op.q.out +++ b/ql/src/test/results/clientpositive/concat_op.q.out @@ -160,26 +160,26 @@ POSTHOOK: query: insert into ct2 values (8),(6),(4),(2) POSTHOOK: type: QUERY POSTHOOK: Output: default@ct2 POSTHOOK: Lineage: ct2.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 order by c +PREHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 PREHOOK: type: CREATEVIEW PREHOOK: Input: default@ct1 PREHOOK: Input: default@ct2 PREHOOK: Output: database:default PREHOOK: Output: default@ct_v1 -POSTHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 order by c +POSTHOOK: query: create view ct_v1 as select * from ct1 union all select * from ct2 POSTHOOK: type: CREATEVIEW POSTHOOK: Input: default@ct1 POSTHOOK: Input: default@ct2 POSTHOOK: Output: database:default POSTHOOK: Output: default@ct_v1 POSTHOOK: Lineage: ct_v1.c EXPRESSION [(ct1)ct1.FieldSchema(name:c, type:int, comment:null), (ct2)ct2.FieldSchema(name:c, type:int, comment:null), ] -PREHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 +PREHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c PREHOOK: type: QUERY PREHOOK: Input: default@ct1 PREHOOK: Input: default@ct2 PREHOOK: Input: default@ct_v1 #### A masked pattern was here #### -POSTHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 +POSTHOOK: query: select c,c * c + c || 'x', 'c+c=' || c+c || ', c*c=' || c*c || ', (c&c)=' || (c & c) from ct_v1 order by c POSTHOOK: type: QUERY POSTHOOK: Input: default@ct1 POSTHOOK: Input: default@ct2 @@ -215,24 +215,24 @@ POSTHOOK: Input: default@ct_v1 7 x3 a56b 2 x2 a6b 8 x0 a72b -PREHOOK: query: select 'x' || (c&3),collect_list(c) from ct_v1 - group by 'x' || (c&3) +PREHOOK: query: select 'x' || (c&3) from ct_v1 + group by 'x' || (c&3) order by 'x' || (c&3) PREHOOK: type: QUERY PREHOOK: Input: default@ct1 PREHOOK: Input: default@ct2 PREHOOK: Input: default@ct_v1 #### A masked pattern was here #### -POSTHOOK: query: select 'x' || (c&3),collect_list(c) from ct_v1 - group by 'x' || (c&3) +POSTHOOK: query: select 'x' || (c&3) from ct_v1 + group by 'x' || (c&3) order by 'x' || (c&3) POSTHOOK: type: QUERY POSTHOOK: Input: default@ct1 POSTHOOK: Input: default@ct2 POSTHOOK: Input: default@ct_v1 #### A masked pattern was here #### -x0 [4,8] -x1 [1,5] -x2 [2,6] -x3 [3,7] +x0 +x1 +x2 +x3 PREHOOK: query: explain select concat('a','b','c') PREHOOK: type: QUERY POSTHOOK: query: explain select concat('a','b','c') diff --git a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out deleted file mode 100644 index 873a41d..0000000 --- a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ /dev/null @@ -1,1431 +0,0 @@ -PREHOOK: query: EXPLAIN -FROM -(SELECT src.* FROM src sort by key) X -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (X.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (X.key = Z.key) -SELECT sum(hash(Y.key,Y.value)) GROUP BY Y.key -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -FROM -(SELECT src.* FROM src sort by key) X -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (X.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (X.key = Z.key) -SELECT sum(hash(Y.key,Y.value)) GROUP BY Y.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-3 depends on stages: Stage-2 - Stage-4 is a root stage - Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest1 -POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest1 -PREHOOK: query: CREATE TABLE dest2(key INT, value STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@dest2 -POSTHOOK: query: CREATE TABLE dest2(key INT, value STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@dest2 -PREHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(SUBSTR(src.value,5)) GROUP BY src.key -INSERT OVERWRITE TABLE dest2 SELECT src.key, sum(SUBSTR(src.value,5)) GROUP BY src.key -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(SUBSTR(src.value,5)) GROUP BY src.key -INSERT OVERWRITE TABLE dest2 SELECT src.key, sum(SUBSTR(src.value,5)) GROUP BY src.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-1 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-5 - Stats-Aggr Operator - -PREHOOK: query: SELECT dest1.* FROM dest1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest1.* FROM dest1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest1 -#### A masked pattern was here #### -PREHOOK: query: SELECT dest2.* FROM dest2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest2 -#### A masked pattern was here #### -POSTHOOK: query: SELECT dest2.* FROM dest2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest2 -#### A masked pattern was here #### -PREHOOK: query: DROP TABLE dest1 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dest1 -PREHOOK: Output: default@dest1 -POSTHOOK: query: DROP TABLE dest1 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dest1 -POSTHOOK: Output: default@dest1 -PREHOOK: query: DROP TABLE dest2 -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@dest2 -PREHOOK: Output: default@dest2 -POSTHOOK: query: DROP TABLE dest2 -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@dest2 -POSTHOOK: Output: default@dest2 -PREHOOK: query: CREATE TABLE tmptable(key STRING, value INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tmptable -POSTHOOK: query: CREATE TABLE tmptable(key STRING, value INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tmptable -PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE tmptable - SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, count(1) AS value FROM src s1 - UNION ALL - SELECT 'tst2' AS key, count(1) AS value FROM src s2 - UNION ALL - SELECT 'tst3' AS key, count(1) AS value FROM src s3) unionsrc -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE tmptable - SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, count(1) AS value FROM src s1 - UNION ALL - SELECT 'tst2' AS key, count(1) AS value FROM src s2 - UNION ALL - SELECT 'tst3' AS key, count(1) AS value FROM src s3) unionsrc -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-9, Stage-10 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-9 is a root stage - Stage-10 is a root stage - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - TableScan - Union - Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst2' (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst3' (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - -PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tmptable -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM tmptable x SORT BY x.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tmptable -#### A masked pattern was here #### -PREHOOK: query: DROP TABLE tmtable -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE tmtable -POSTHOOK: type: DROPTABLE -PREHOOK: query: EXPLAIN -SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value -FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 - UNION ALL - SELECT s2.key AS key, s2.value AS value FROM src s2 WHERE s2.key < 10) unionsrc1 -JOIN - (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s3 - UNION ALL - SELECT s4.key AS key, s4.value AS value FROM src s4 WHERE s4.key < 10) unionsrc2 -ON (unionsrc1.key = unionsrc2.key) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value -FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 - UNION ALL - SELECT s2.key AS key, s2.value AS value FROM src s2 WHERE s2.key < 10) unionsrc1 -JOIN - (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s3 - UNION ALL - SELECT s4.key AS key, s4.value AS value FROM src s4 WHERE s4.key < 10) unionsrc2 -ON (unionsrc1.key = unionsrc2.key) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-4 is a root stage - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Union - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string) - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string) - TableScan - Union - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string) - TableScan - alias: s4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) < 10.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Union - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27889 Data size: 15171616 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: true - Statistics: Num rows: 27889 Data size: 15171616 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: CREATE TABLE inv(w_warehouse_name STRING , w_warehouse_sk INT , stdev INT , d_moy INT , mean INT , cov INT , inv_quantity_on_hand INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@inv -POSTHOOK: query: CREATE TABLE inv(w_warehouse_name STRING , w_warehouse_sk INT , stdev INT , d_moy INT , mean INT , cov INT , inv_quantity_on_hand INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@inv -PREHOOK: query: CREATE TABLE inventory(inv_date_sk INT , inv_item_sk INT ,inv_quantity_on_hand INT ,inv_warehouse_sk INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@inventory -POSTHOOK: query: CREATE TABLE inventory(inv_date_sk INT , inv_item_sk INT ,inv_quantity_on_hand INT ,inv_warehouse_sk INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@inventory -PREHOOK: query: CREATE TABLE item(i_item_sk INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@item -POSTHOOK: query: CREATE TABLE item(i_item_sk INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@item -PREHOOK: query: CREATE TABLE warehouse(w_warehouse_sk INT , w_warehouse_name STRING) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@warehouse -POSTHOOK: query: CREATE TABLE warehouse(w_warehouse_sk INT , w_warehouse_name STRING) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@warehouse -PREHOOK: query: CREATE TABLE date_dim(d_date_sk INT , d_year INT , d_moy INT) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@date_dim -POSTHOOK: query: CREATE TABLE date_dim(d_date_sk INT , d_year INT , d_moy INT) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@date_dim -PREHOOK: query: EXPLAIN -WITH inv AS -(SELECT w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,stdev,mean, CASE mean WHEN 0 THEN null ELSE stdev/mean END cov -FROM(SELECT w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,STDDEV_SAMP(inv_quantity_on_hand) stdev,AVG(inv_quantity_on_hand) mean - FROM inventory - ,item - ,warehouse - ,date_dim - WHERE inv_item_sk = i_item_sk - AND inv_warehouse_sk = w_warehouse_sk - AND inv_date_sk = d_date_sk - AND d_year =1999 - GROUP BY w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo - WHERE CASE mean WHEN 0 THEN 0 ELSE stdev/mean END > 1) -SELECT inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov - ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov -FROM inv inv1,inv inv2 -WHERE inv1.i_item_sk = inv2.i_item_sk - AND inv1.w_warehouse_sk = inv2.w_warehouse_sk - AND inv1.d_moy=3 - AND inv2.d_moy=3+1 -ORDER BY inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov - ,inv2.d_moy,inv2.mean, inv2.cov -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -WITH inv AS -(SELECT w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,stdev,mean, CASE mean WHEN 0 THEN null ELSE stdev/mean END cov -FROM(SELECT w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,STDDEV_SAMP(inv_quantity_on_hand) stdev,AVG(inv_quantity_on_hand) mean - FROM inventory - ,item - ,warehouse - ,date_dim - WHERE inv_item_sk = i_item_sk - AND inv_warehouse_sk = w_warehouse_sk - AND inv_date_sk = d_date_sk - AND d_year =1999 - GROUP BY w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo - WHERE CASE mean WHEN 0 THEN 0 ELSE stdev/mean END > 1) -SELECT inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov - ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov -FROM inv inv1,inv inv2 -WHERE inv1.i_item_sk = inv2.i_item_sk - AND inv1.w_warehouse_sk = inv2.w_warehouse_sk - AND inv1.d_moy=3 - AND inv2.d_moy=3+1 -ORDER BY inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov - ,inv2.d_moy,inv2.mean, inv2.cov -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-4, Stage-13 - Stage-6 depends on stages: Stage-5 - Stage-10 is a root stage - Stage-11 depends on stages: Stage-10 - Stage-12 depends on stages: Stage-11 - Stage-13 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-6 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: inventory - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int), inv_warehouse_sk (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - TableScan - alias: item - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int) - TableScan - alias: warehouse - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: string) - TableScan - alias: date_dim - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and (d_moy = 3) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col2), avg(_col2) - keys: _col5 (type: int), _col4 (type: int), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: CASE WHEN ((_col4 = 0.0)) THEN (false) ELSE (((_col3 / _col4) > 1.0)) END (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0)) THEN (null) ELSE ((_col3 / _col4)) END (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: double), _col4 (type: double) - TableScan - Reduce Output Operator - key expressions: _col1 (type: int), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: double), _col4 (type: double) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col2 (type: int) - 1 _col1 (type: int), _col2 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: double), _col6 (type: int), _col7 (type: int), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: double), _col6 (type: double), _col7 (type: double) - sort order: ++++++ - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col4 (type: int), _col5 (type: int) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), 3 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: double), VALUE._col0 (type: int), VALUE._col1 (type: int), 4 (type: int), KEY.reducesinkkey4 (type: double), KEY.reducesinkkey5 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: inventory - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (inv_item_sk is not null and inv_warehouse_sk is not null and inv_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int), inv_warehouse_sk (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) - TableScan - alias: item - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: i_item_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: i_item_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int) - TableScan - alias: warehouse - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: w_warehouse_sk is not null (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: string) - TableScan - alias: date_dim - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col2), avg(_col2) - keys: _col5 (type: int), _col4 (type: int), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: CASE WHEN ((_col4 = 0.0)) THEN (false) ELSE (((_col3 / _col4) > 1.0)) END (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0)) THEN (null) ELSE ((_col3 / _col4)) END (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: EXPLAIN -WITH test AS -(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory - UNION ALL - SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) -SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN -WITH test AS -(SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory - UNION ALL - SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM inventory) -SELECT inv_date_sk , inv_item_sk ,inv_quantity_on_hand FROM test SORT BY inv_quantity_on_hand -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: inventory - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - TableScan - alias: inventory - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Union - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: true - Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: DROP TABLE inv -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@inv -PREHOOK: Output: default@inv -POSTHOOK: query: DROP TABLE inv -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@inv -POSTHOOK: Output: default@inv -PREHOOK: query: DROP TABLE inventory -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@inventory -PREHOOK: Output: default@inventory -POSTHOOK: query: DROP TABLE inventory -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@inventory -POSTHOOK: Output: default@inventory -PREHOOK: query: DROP TABLE item -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@item -PREHOOK: Output: default@item -POSTHOOK: query: DROP TABLE item -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@item -POSTHOOK: Output: default@item -PREHOOK: query: DROP TABLE warehouse -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@warehouse -PREHOOK: Output: default@warehouse -POSTHOOK: query: DROP TABLE warehouse -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@warehouse -POSTHOOK: Output: default@warehouse -PREHOOK: query: DROP TABLE date_dim -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@date_dim -PREHOOK: Output: default@date_dim -POSTHOOK: query: DROP TABLE date_dim -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@date_dim -POSTHOOK: Output: default@date_dim diff --git a/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out b/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out index a721b7f..4c5b0f8 100644 --- a/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out +++ b/ql/src/test/results/clientpositive/groupby_distinct_samekey.q.out @@ -24,8 +24,7 @@ POSTHOOK: query: explain select Q1.int1, sum(distinct Q1.int1) from (select * fr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -36,39 +35,19 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: int1 (type: int) - outputColumnNames: _col0 + outputColumnNames: int1 Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + + Group By Operator + aggregations: sum(DISTINCT int1) + keys: int1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT _col0) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 8 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) diff --git a/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out b/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out index 7ec14e8..eaa48a0 100644 --- a/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out +++ b/ql/src/test/results/clientpositive/identity_project_remove_skip.q.out @@ -17,157 +17,31 @@ from where t2.value='val_105' and t3.key='105' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-6 depends on stages: Stage-1, Stage-3 , consists of Stage-7, Stage-8, Stage-2 - Stage-7 has a backup stage: Stage-2 - Stage-4 depends on stages: Stage-7 - Stage-8 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-8 - Stage-2 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-4, Stage-5, Stage-2 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value = 'val_105') and (key = '105')) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 '105' (type: string) - 1 '105' (type: string) - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '105' (type: string) - 1 '105' (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $INTNAME + t2:t1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $INTNAME + t2:t1:src TableScan - HashTable Sink Operator - keys: - 0 '105' (type: string) - 1 '105' (type: string) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '105' (type: string) - 1 '105' (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Map-reduce partition columns: '105' (type: string) + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value = 'val_105') and (key = '105')) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Map-reduce partition columns: '105' (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 '105' (type: string) + 1 '105' (type: string) Stage: Stage-3 Map Reduce @@ -180,19 +54,26 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '105' (type: string) + 1 '105' (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '105' (type: string), 'val_105' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/input20.q.out b/ql/src/test/results/clientpositive/input20.q.out index cf0ee1d..9587445 100644 --- a/ql/src/test/results/clientpositive/input20.q.out +++ b/ql/src/test/results/clientpositive/input20.q.out @@ -11,8 +11,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -24,8 +23,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -56,9 +54,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: @@ -103,8 +100,7 @@ PREHOOK: query: FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -117,8 +113,7 @@ POSTHOOK: query: FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -129,11 +124,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM dest1 SORT BY key, value +PREHOOK: query: SELECT * FROM dest1 ORDER BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest1 SORT BY key, value +POSTHOOK: query: SELECT * FROM dest1 ORDER BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/input33.q.out b/ql/src/test/results/clientpositive/input33.q.out index b35e2d0..4be5fc5 100644 --- a/ql/src/test/results/clientpositive/input33.q.out +++ b/ql/src/test/results/clientpositive/input33.q.out @@ -11,8 +11,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -24,8 +23,7 @@ FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -56,9 +54,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: @@ -103,8 +100,7 @@ PREHOOK: query: FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -117,8 +113,7 @@ POSTHOOK: query: FROM ( FROM src MAP src.key, src.key USING 'cat' - DISTRIBUTE BY key - SORT BY key, value + DISTRIBUTE BY key, value ) tmap INSERT OVERWRITE TABLE dest1 REDUCE tmap.key, tmap.value @@ -129,11 +124,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM dest1 SORT BY key, value +PREHOOK: query: SELECT * FROM dest1 ORDER BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest1 SORT BY key, value +POSTHOOK: query: SELECT * FROM dest1 ORDER BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/input3_limit.q.out b/ql/src/test/results/clientpositive/input3_limit.q.out index c7bc4ac..560de96 100644 --- a/ql/src/test/results/clientpositive/input3_limit.q.out +++ b/ql/src/test/results/clientpositive/input3_limit.q.out @@ -31,10 +31,10 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T2 PREHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 +INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -54,40 +54,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + sort order: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE @@ -113,21 +109,21 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 +PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 +POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key) T ORDER BY key, value LIMIT 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.value SIMPLE [(t1)t1.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM T2 SORT BY key, value +PREHOOK: query: SELECT * FROM T2 ORDER BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM T2 SORT BY key, value +POSTHOOK: query: SELECT * FROM T2 ORDER BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index 35e6827..eeb47f7 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -145,63 +145,53 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Lateral View Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Lateral View Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Lateral View Join Operator - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: array(_col1,_col1) (type: array) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - UDTF Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: array(_col1,_col1) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_join0.q.out b/ql/src/test/results/clientpositive/llap/auto_join0.q.out index 1141db2..7f0a878 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join0.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -30,10 +30,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,29 +72,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Reducer 5 + 1 Reducer 4 Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 9809936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -110,7 +98,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -128,7 +116,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git a/ql/src/test/results/clientpositive/llap/auto_join30.q.out b/ql/src/test/results/clientpositive/llap/auto_join30.q.out index b1cb5fa..91a8012 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join30.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join30.q.out @@ -23,9 +23,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -43,10 +42,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -58,52 +58,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -169,9 +145,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -183,13 +158,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -199,41 +189,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -246,19 +211,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -310,9 +262,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -327,10 +278,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -339,52 +291,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -456,10 +384,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -477,10 +403,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -492,60 +419,50 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - 2 Reducer 6 - Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 5 + LLAP IO: no inputs + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -560,18 +477,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -635,11 +540,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -654,10 +556,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -667,32 +570,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -712,7 +615,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -727,31 +630,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -815,11 +693,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -834,10 +709,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -847,32 +723,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -892,7 +768,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -907,31 +783,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -995,11 +846,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1014,10 +862,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -1027,36 +876,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Left Outer Join 0 to 1 @@ -1076,7 +921,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1091,31 +936,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1179,11 +999,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1198,10 +1015,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -1211,32 +1029,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Merge Join Operator condition map: Right Outer Join 0 to 1 @@ -1256,7 +1074,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1271,31 +1089,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index e78ceb3..84b140a 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -209,7 +209,7 @@ POSTHOOK: query: drop table src_orc_merge_test_part POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@src_orc_merge_test_part POSTHOOK: Output: default@src_orc_merge_test_part -Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( select src1.key as k1, src1.value as v1, @@ -233,42 +233,37 @@ POSTHOOK: type: QUERY Plan not optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 4 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap - File Output Operator [FS_17] - Group By Operator [GBY_15] (rows=1 width=8) + Reducer 2 llap + File Output Operator [FS_15] + Group By Operator [GBY_13] (rows=1 width=8) Output:["_col0"],aggregations:["sum(VALUE._col0)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_14] - Group By Operator [GBY_13] (rows=1 width=8) + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_12] + Group By Operator [GBY_11] (rows=1 width=8) Output:["_col0"],aggregations:["sum(hash(_col0,_col1,_col2,_col3))"] - Select Operator [SEL_11] (rows=27556 width=356) - Output:["_col0","_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - Map Join Operator [MAPJOIN_20] (rows=27556 width=356) - Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] - <-Map 4 [BROADCAST_EDGE] llap - BROADCAST [RS_7] - Select Operator [SEL_5] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_19] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_3] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_18] (rows=166 width=178) - predicate:(key < 10) - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Map Join Operator [MAPJOIN_18] (rows=27556 width=356) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3"] + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_7] + Select Operator [SEL_5] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_17] (rows=166 width=178) + predicate:(key < 10) + TableScan [TS_3] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Select Operator [SEL_2] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_16] (rows=166 width=178) + predicate:(key < 10) + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select key, (c_int+1)+2 as x, sum(c_int) from cbo_t1 group by c_float, cbo_t1.c_int, key PREHOOK: type: QUERY @@ -345,86 +340,76 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_37] - Select Operator [SEL_35] (rows=1 width=20) + File Output Operator [FS_31] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_34] - Select Operator [SEL_33] (rows=1 width=28) + SHUFFLE [RS_28] + Select Operator [SEL_27] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_32] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_30] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col1 - Select Operator [SEL_29] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_47] (rows=1 width=20) - Conds:RS_25._col0=RS_26._col0(Inner),RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_41] (rows=1 width=20) + Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_44] (rows=18 width=84) + Filter Operator [FIL_38] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=1 width=89) - Output:["_col0","_col1"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Select Operator [SEL_20] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_19] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_46] (rows=5 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) - TableScan [TS_14] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_12] (rows=1 width=97) + Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=2 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_45] (rows=5 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=2 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=5 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=1 width=89) + Output:["_col0","_col1"] + Group By Operator [GBY_16] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_14] (rows=2 width=93) + Output:["_col0","_col1","_col2"],keys:key, c_int, c_float + Filter Operator [FIL_40] (rows=5 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and key is not null) + TableScan [TS_11] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b % c asc, b desc) cbo_t1 left outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p left outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int % c asc, cbo_t3.c_int desc PREHOOK: type: QUERY @@ -433,80 +418,75 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_34] - Select Operator [SEL_32] (rows=1 width=20) + File Output Operator [FS_31] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] - Select Operator [SEL_30] (rows=1 width=28) + SHUFFLE [RS_28] + Select Operator [SEL_27] (rows=1 width=28) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_29] (rows=1 width=20) + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_27] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_26] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_44] (rows=1 width=20) - Conds:RS_22._col0=RS_23._col0(Inner),RS_23._col0=RS_24._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col1 >= 1) or (_col4 >= 1))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_41] (rows=1 width=20) + Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col1 >= 1) or (_col4 >= 1))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] + SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_41] (rows=18 width=84) + Filter Operator [FIL_38] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_12] (rows=1 width=97) + Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Select Operator [SEL_9] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_42] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=2 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_20] (rows=1 width=89) + Select Operator [SEL_17] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_19] (rows=1 width=93) + Group By Operator [GBY_16] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=1 width=93) + Group By Operator [GBY_14] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_43] (rows=2 width=93) + Filter Operator [FIL_40] (rows=2 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_14] (rows=20 width=88) + TableScan [TS_11] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by b+c, a desc) cbo_t1 right outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p right outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 2) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c @@ -516,73 +496,68 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_31] - Group By Operator [GBY_29] (rows=1 width=20) + File Output Operator [FS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_27] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_26] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_41] (rows=1 width=20) - Conds:RS_22._col0=RS_23._col0(Inner),RS_23._col0=RS_24._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)} + Merge Join Operator [MERGEJOIN_38] (rows=1 width=20) + Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] + SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_38] (rows=18 width=84) + Filter Operator [FIL_35] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_12] (rows=1 width=97) + Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Select Operator [SEL_9] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_36] (rows=2 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] PartitionCols:_col0 - Select Operator [SEL_20] (rows=1 width=89) + Select Operator [SEL_17] (rows=1 width=89) Output:["_col0","_col1"] - Group By Operator [GBY_19] (rows=1 width=93) + Group By Operator [GBY_16] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=1 width=93) + Group By Operator [GBY_14] (rows=1 width=93) Output:["_col0","_col1","_col2"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=2 width=93) + Filter Operator [FIL_37] (rows=2 width=93) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_14] (rows=20 width=88) + TableScan [TS_11] (rows=20 width=88) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by c+a desc) cbo_t1 full outer join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0 order by p+q desc, r asc) cbo_t2 on cbo_t1.a=p full outer join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c having cbo_t3.c_int > 0 and (c_int >=1 or c >= 1) and (c_int + c) >= 0 order by cbo_t3.c_int @@ -592,84 +567,74 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_36] - Select Operator [SEL_35] (rows=1 width=20) + File Output Operator [FS_30] + Select Operator [SEL_29] (rows=1 width=20) Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_34] - Group By Operator [GBY_32] (rows=1 width=20) + SHUFFLE [RS_28] + Group By Operator [GBY_26] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] + SHUFFLE [RS_25] PartitionCols:_col0, _col1 - Group By Operator [GBY_30] (rows=1 width=20) + Group By Operator [GBY_24] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_29] (rows=1 width=20) + Select Operator [SEL_23] (rows=1 width=20) Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_46] (rows=1 width=20) - Conds:RS_25._col0=RS_26._col0(Inner),RS_26._col0=RS_27._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col1 >= 1) or (_col4 >= 1))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Merge Join Operator [MERGEJOIN_40] (rows=1 width=20) + Conds:RS_19._col0=RS_20._col0(Inner),RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col1 >= 1) or (_col4 >= 1))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_25] + SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_2] (rows=18 width=84) Output:["_col0","_col1"] - Filter Operator [FIL_43] (rows=18 width=84) + Filter Operator [FIL_37] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=1 width=89) - Output:["_col0","_col1"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - Select Operator [SEL_20] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_19] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_45] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_14] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_26] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_20] PartitionCols:_col0 - Select Operator [SEL_12] (rows=1 width=97) + Select Operator [SEL_9] (rows=1 width=97) Output:["_col0","_col1","_col2"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - Select Operator [SEL_9] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col5"] - Group By Operator [GBY_8] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_6] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_44] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_3] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_8] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_6] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_38] (rows=2 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_3] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=1 width=89) + Output:["_col0","_col1"] + Group By Operator [GBY_16] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_14] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=2 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) + TableScan [TS_11] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -1535,91 +1500,81 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 8 <- Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 llap - File Output Operator [FS_41] - Select Operator [SEL_40] (rows=1 width=101) + Reducer 5 llap + File Output Operator [FS_35] + Select Operator [SEL_34] (rows=1 width=101) Output:["_col0","_col1","_col2"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] - Select Operator [SEL_38] (rows=1 width=101) + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_33] + Select Operator [SEL_32] (rows=1 width=101) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_37] (rows=1 width=101) + Group By Operator [GBY_31] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_36] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=1 width=101) + Group By Operator [GBY_29] (rows=1 width=101) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Merge Join Operator [MERGEJOIN_51] (rows=1 width=93) - Conds:RS_30._col0=RS_31._col0(Left Semi),RS_30._col0=RS_32._col0(Left Semi),Output:["_col0","_col1"] - <-Map 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] + Merge Join Operator [MERGEJOIN_45] (rows=1 width=93) + Conds:RS_24._col0=RS_25._col0(Left Semi),RS_24._col0=RS_26._col0(Left Semi),Output:["_col0","_col1"] + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_26] PartitionCols:_col0 - Group By Operator [GBY_29] (rows=6 width=85) + Group By Operator [GBY_23] (rows=3 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_25] (rows=18 width=80) + Select Operator [SEL_19] (rows=6 width=85) Output:["_col0"] - Filter Operator [FIL_50] (rows=18 width=80) - predicate:key is not null - TableScan [TS_23] (rows=20 width=80) + Filter Operator [FIL_44] (rows=6 width=85) + predicate:(UDFToDouble(key) > 0.0) + TableScan [TS_17] (rows=20 width=80) default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_30] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_24] PartitionCols:_col0 - Select Operator [SEL_10] (rows=1 width=93) + Select Operator [SEL_8] (rows=1 width=93) Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - Select Operator [SEL_8] (rows=1 width=109) - Output:["_col0","_col1","_col4"] - Filter Operator [FIL_7] (rows=1 width=101) - predicate:(((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1)) and ((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0)) - Select Operator [SEL_6] (rows=1 width=101) - Output:["_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_48] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] + Filter Operator [FIL_7] (rows=1 width=101) + predicate:(((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1)) and ((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0)) + Select Operator [SEL_6] (rows=1 width=101) + Output:["_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_42] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_25] PartitionCols:_col0 - Group By Operator [GBY_27] (rows=1 width=85) + Group By Operator [GBY_21] (rows=1 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_21] (rows=1 width=85) + Select Operator [SEL_15] (rows=1 width=85) Output:["_col0"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] - Select Operator [SEL_18] (rows=1 width=93) - Output:["_col0","_col3"] - Group By Operator [GBY_17] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_16] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_15] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_49] (rows=2 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and key is not null) - TableScan [TS_12] (rows=20 width=88) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_14] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_13] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_12] (rows=1 width=93) + Output:["_col0","_col1","_col2"],keys:key, c_int, c_float + Filter Operator [FIL_43] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0)) and (c_float > 0) and ((c_int >= 1) or (c_float >= 1)) and ((UDFToFloat(c_int) + c_float) >= 0) and (UDFToDouble(key) > 0.0)) + TableScan [TS_9] (rows=20 width=88) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out b/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out index 3375fa8..b03b96b 100644 --- a/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out +++ b/ql/src/test/results/clientpositive/llap/identity_project_remove_skip.q.out @@ -25,8 +25,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -42,10 +41,11 @@ STAGE PLANS: Reduce Output Operator key expressions: '105' (type: string) sort order: + + Map-reduce partition columns: '105' (type: string) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -55,47 +55,28 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '105' (type: string) + 1 '105' (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: '105' (type: string), 'val_105' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 2 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '105' (type: string) - 1 '105' (type: string) - input vertices: - 1 Reducer 4 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Map-reduce partition columns: '105' (type: string) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/mrr.q.out b/ql/src/test/results/clientpositive/llap/mrr.q.out index 7866e37..fe477fd 100644 --- a/ql/src/test/results/clientpositive/llap/mrr.q.out +++ b/ql/src/test/results/clientpositive/llap/mrr.q.out @@ -1293,13 +1293,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) Reducer 6 <- Map 1 (SIMPLE_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 8 <- Map 1 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1346,24 +1343,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1385,7 +1370,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 68 Data size: 19380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1399,7 +1384,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1412,24 +1397,12 @@ STAGE PLANS: predicate: (_col1 > 1) (type: boolean) Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: bigint) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 68 Data size: 6460 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 8 + value expressions: _col1 (type: bigint) + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1439,18 +1412,6 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/llap/tez_join_tests.q.out b/ql/src/test/results/clientpositive/llap/tez_join_tests.q.out index d951797..b0eff1e 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join_tests.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join_tests.q.out @@ -13,10 +13,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -46,7 +45,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -74,24 +73,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -107,7 +94,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out b/ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out index bbde077..418c23c 100644 --- a/ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_joins_explain.q.out @@ -13,10 +13,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -46,7 +45,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -74,24 +73,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 60 Data size: 10680 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -107,7 +94,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out b/ql/src/test/results/clientpositive/llap/vector_join30.q.out index 381f13e..6b5e604 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -39,9 +39,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -67,14 +66,41 @@ STAGE PLANS: native: true projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinInnerStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -82,10 +108,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan alias: orcsrc @@ -109,14 +135,15 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -134,59 +161,6 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinInnerStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: @@ -213,34 +187,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -296,9 +242,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -317,14 +262,41 @@ STAGE PLANS: native: true projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -332,10 +304,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan alias: orcsrc @@ -352,14 +324,15 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -377,59 +350,6 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinOuterStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: @@ -456,34 +376,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -539,9 +431,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -563,8 +454,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -578,7 +470,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 3 + Map 2 Map Operator Tree: TableScan alias: orcsrc @@ -594,15 +486,41 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOuterStringOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -610,90 +528,10 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: true vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinOuterStringOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 550 Data size: 96800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -787,10 +625,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 6 <- Map 4 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -816,14 +652,88 @@ STAGE PLANS: native: true projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + Group By Vectorization: + aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -850,31 +760,22 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1] + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -892,63 +793,6 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - 2 Reducer 6 - Statistics: Num rows: 1100 Data size: 193600 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - Group By Vectorization: - aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - vectorOutput: true - native: false - vectorProcessingMode: HASH - projectedOutputColumns: [0] - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: @@ -975,61 +819,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1097,11 +886,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1123,8 +909,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -1138,7 +925,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc @@ -1155,23 +942,50 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1183,33 +997,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1237,7 +1024,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1270,61 +1057,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1392,11 +1124,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1418,8 +1147,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -1433,7 +1163,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc @@ -1450,23 +1180,50 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1478,33 +1235,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1532,7 +1262,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1565,61 +1295,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1687,11 +1362,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1713,8 +1385,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -1728,7 +1401,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc @@ -1745,31 +1418,50 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1] + projectedOutputColumns: [0] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1781,33 +1473,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1835,7 +1500,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -1868,61 +1533,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1990,11 +1600,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2016,8 +1623,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE @@ -2031,7 +1639,7 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan alias: orcsrc @@ -2048,23 +1656,50 @@ STAGE PLANS: projectedOutputColumns: [0, 1] Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2076,33 +1711,6 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2130,7 +1738,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -2163,61 +1771,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1, 0] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 7 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out b/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index 33cc940..a172fd2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -118,7 +118,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -130,23 +130,35 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), (t < 0) (type: boolean), (si <= 0) (type: boolean), (i = 0) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: hash(t,si,i,(t < 0),(si <= 0),(i = 0)) (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1, 2, 7, 8, 9] - selectExpressions: LongColLessLongScalar(col 0, val 0) -> 7:long, LongColLessEqualLongScalar(col 1, val 0) -> 8:long, LongColEqualLongScalar(col 2, val 0) -> 9:long + projectedOutputColumns: [10] + selectExpressions: VectorUDFAdaptor(hash(t,si,i,(t < 0),(si <= 0),(i = 0)))(children: LongColLessLongScalar(col 0, val 0) -> 7:long, LongColLessEqualLongScalar(col 1, val 0) -> 8:long, LongColEqualLongScalar(col 2, val 0) -> 9:long) -> 10:int Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) - sort order: +++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 10) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -154,8 +166,8 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -164,41 +176,32 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: - Select Operator - expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0] + mode: mergepartial outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [6] - selectExpressions: VectorUDFAdaptor(hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2)) -> 6:int - Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 6) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - vectorOutput: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator native: false - vectorProcessingMode: GLOBAL - projectedOutputColumns: [0] - mode: complete - outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -244,7 +247,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -256,23 +259,35 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] Select Operator - expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), (t > 0) (type: boolean), (si >= 0) (type: boolean), (i <> 0) (type: boolean), (b > 0) (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: hash(t,si,i,b,(t > 0),(si >= 0),(i <> 0),(b > 0)) (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1, 2, 3, 7, 8, 9, 10] - selectExpressions: LongColGreaterLongScalar(col 0, val 0) -> 7:long, LongColGreaterEqualLongScalar(col 1, val 0) -> 8:long, LongColNotEqualLongScalar(col 2, val 0) -> 9:long, LongColGreaterLongScalar(col 3, val 0) -> 10:long + projectedOutputColumns: [11] + selectExpressions: VectorUDFAdaptor(hash(t,si,i,b,(t > 0),(si >= 0),(i <> 0),(b > 0)))(children: LongColGreaterLongScalar(col 0, val 0) -> 7:long, LongColGreaterEqualLongScalar(col 1, val 0) -> 8:long, LongColNotEqualLongScalar(col 2, val 0) -> 9:long, LongColGreaterLongScalar(col 3, val 0) -> 10:long) -> 11:int Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint) - sort order: ++++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 11) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -280,8 +295,8 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false + allNative: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap @@ -290,41 +305,32 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true groupByVectorOutput: true allNative: false - usesVectorUDFAdaptor: true + usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: - Select Operator - expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int) + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0] + mode: mergepartial outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [8] - selectExpressions: VectorUDFAdaptor(hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3)) -> 8:int - Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 8) -> bigint - className: VectorGroupByOperator - groupByMode: COMPLETE - vectorOutput: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator native: false - vectorProcessingMode: GLOBAL - projectedOutputColumns: [0] - mode: complete - outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out index 476dfa7..d1da4e7 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out @@ -30,11 +30,10 @@ INSERT OVERWRITE TABLE e2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -44,35 +43,14 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 + expressions: UDFToDouble(key) (type: double) + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -116,7 +94,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-1 @@ -129,7 +107,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator PREHOOK: query: FROM (select key, cast(key as double) as value from src order by key) a diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 32aec10..610b15f 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -38,11 +38,10 @@ INSERT OVERWRITE TABLE e2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -56,32 +55,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -132,7 +110,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-1 @@ -145,7 +123,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator PREHOOK: query: explain @@ -164,11 +142,10 @@ INSERT OVERWRITE TABLE e1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 @@ -182,32 +159,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -258,7 +214,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-3 Stats-Aggr Operator Stage: Stage-1 @@ -271,7 +227,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator PREHOOK: query: FROM (select key, cast(key as double) as keyD, value from src order by key) a @@ -1743,12 +1699,12 @@ INSERT overwrite table e3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage - Stage-4 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-6 Stage-7 depends on stages: Stage-2 STAGE PLANS: @@ -1763,51 +1719,23 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + value expressions: _col1 (type: double) + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1858,7 +1786,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-5 + Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 @@ -1871,9 +1799,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-6 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col3:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 + Stage: Stage-2 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/ppd2.q.out b/ql/src/test/results/clientpositive/ppd2.q.out index 9111fc6..43119be 100644 --- a/ql/src/test/results/clientpositive/ppd2.q.out +++ b/ql/src/test/results/clientpositive/ppd2.q.out @@ -28,8 +28,7 @@ where b.cc>1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,32 +65,11 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: +- - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -440,8 +418,7 @@ where b.cc>1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -473,27 +450,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: +- - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col1 > 1) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/ppd_join4.q.out b/ql/src/test/results/clientpositive/ppd_join4.q.out index 6ca7446..7557ced 100644 --- a/ql/src/test/results/clientpositive/ppd_join4.q.out +++ b/ql/src/test/results/clientpositive/ppd_join4.q.out @@ -46,8 +46,7 @@ where t2.name='c' and t3.id='a' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,26 +63,8 @@ STAGE PLANS: Reduce Output Operator key expressions: 'a' (type: string) sort order: + + Map-reduce partition columns: 'a' (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: 'a' (type: string) - sort order: + - Map-reduce partition columns: 'a' (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TableScan alias: t3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/auto_join0.q.out b/ql/src/test/results/clientpositive/spark/auto_join0.q.out index bc9c5db..f634fa9 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join0.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -30,10 +30,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 6 <- Map 5 (SORT, 1) + Reducer 5 <- Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -49,7 +49,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 6 + Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -66,8 +66,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -102,28 +101,18 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Reducer 6 + 1 Reducer 5 Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27556 Data size: 612872 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -144,7 +133,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git a/ql/src/test/results/clientpositive/spark/auto_join15.q.out b/ql/src/test/results/clientpositive/spark/auto_join15.q.out index 6cb98a8..1784793 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join15.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join15.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -42,8 +42,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -62,35 +61,25 @@ STAGE PLANS: 1 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git a/ql/src/test/results/clientpositive/spark/auto_join20.q.out b/ql/src/test/results/clientpositive/spark/auto_join20.q.out index aae0d15..38b5e9b 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join20.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -66,8 +66,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -96,29 +95,19 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 4 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial @@ -225,8 +214,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -255,29 +243,19 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reducer 4 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 - Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) mode: mergepartial diff --git a/ql/src/test/results/clientpositive/spark/auto_join30.q.out b/ql/src/test/results/clientpositive/spark/auto_join30.q.out index 3f10154..63fbf74 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join30.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -22,11 +22,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -38,29 +36,17 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,38 +61,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -170,11 +146,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -183,29 +157,17 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -217,38 +179,28 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -312,8 +264,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -325,31 +275,20 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -358,39 +297,28 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -460,12 +388,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -477,43 +402,37 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 7 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -528,41 +447,31 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - 2 Reducer 7 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -638,12 +547,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -652,43 +558,34 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 7 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -700,41 +597,31 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - 2 Reducer 7 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -810,12 +697,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan alias: src @@ -824,43 +708,34 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 5 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 7 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -872,41 +747,31 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Left Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 5 - 2 Reducer 7 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -982,9 +847,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -996,11 +858,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src @@ -1009,90 +874,54 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 4 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 6 + Map Join Operator + condition map: + Left Outer Join 0 to 1 + Right Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + 1 Map 2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - 1 Reducer 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 7 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1168,9 +997,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1182,11 +1008,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 2 Map Operator Tree: TableScan alias: src @@ -1195,90 +1024,54 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 4 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 6 + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Right Outer Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + 1 Map 2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - 1 Reducer 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 7 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_join31.q.out b/ql/src/test/results/clientpositive/spark/auto_join31.q.out index 8d1237c..4dbedf9 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join31.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -28,9 +28,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -42,59 +39,37 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Reducer 7 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -103,42 +78,31 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 + Map Join Operator + condition map: + Right Outer Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 0 Reducer 2 - 2 Reducer 7 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index c5ccb2f..1ed388f 100644 --- a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -28,11 +28,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 31), Reducer 6 (PARTITION-LEVEL SORT, 31), Reducer 8 (PARTITION-LEVEL SORT, 31) - Reducer 4 <- Reducer 3 (GROUP, 31) - Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 31) - Reducer 8 <- Map 5 (PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31), Map 4 (PARTITION-LEVEL SORT, 31), Map 5 (PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -47,8 +44,9 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -58,22 +56,26 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: string) sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Join Operator condition map: @@ -97,7 +99,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -116,29 +118,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out b/ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out index 47aee98..551519f 100644 --- a/ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out +++ b/ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out @@ -24,8 +24,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -38,28 +36,18 @@ STAGE PLANS: Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Spark HashTable Sink Operator + keys: + 0 '105' (type: string) + 1 '105' (type: string) Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 '105' (type: string) - 1 '105' (type: string) Stage: Stage-1 Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 2 Map Operator Tree: TableScan alias: src @@ -69,36 +57,28 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '105' (type: string) + 1 '105' (type: string) + input vertices: + 0 Map 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '105' (type: string), 'val_105' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '105' (type: string) - 1 '105' (type: string) - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out index 81b882a..3d67b1d 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out @@ -39,8 +39,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -49,26 +48,15 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1 + expressions: UDFToDouble(key) (type: double) + outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: double) - outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 - Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index 92d10f4..813704f 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -47,8 +47,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -61,24 +60,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 - Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -169,8 +157,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -183,24 +170,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 - Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1747,11 +1723,11 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -1761,42 +1737,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + key expressions: _col0 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: string) - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 - Reducer 3 + expressions: key (type: string), UDFToDouble(key) (type: double), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1836,6 +1802,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col3:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e3 Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/spark_multi_insert_parallel_orderby.q.out b/ql/src/test/results/clientpositive/spark/spark_multi_insert_parallel_orderby.q.out index 6866cf5..371e756 100644 --- a/ql/src/test/results/clientpositive/spark/spark_multi_insert_parallel_orderby.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_multi_insert_parallel_orderby.q.out @@ -263,8 +263,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (SORT, 42) #### A masked pattern was here #### Vertices: Map 1 @@ -276,37 +274,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + File Output Operator + compressed: false Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 Stage: Stage-0 Move Operator @@ -1394,8 +1381,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1407,40 +1393,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e2 - Reducer 3 - Reduce Operator Tree: - Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/tez_join_tests.q.out b/ql/src/test/results/clientpositive/spark/tez_join_tests.q.out index 2c95e12..aa4ffc4 100644 --- a/ql/src/test/results/clientpositive/spark/tez_join_tests.q.out +++ b/ql/src/test/results/clientpositive/spark/tez_join_tests.q.out @@ -12,10 +12,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -33,7 +32,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -47,7 +46,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 6 Map Operator Tree: TableScan alias: c @@ -73,23 +72,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -104,7 +92,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out b/ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out index 24c7b40..4178a72 100644 --- a/ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out +++ b/ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out @@ -12,10 +12,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -33,7 +32,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: a @@ -47,7 +46,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 6 Map Operator Tree: TableScan alias: c @@ -73,23 +72,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -104,7 +92,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Reducer 5 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out b/ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out index cab0b83..4642c19 100644 --- a/ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out +++ b/ql/src/test/results/clientpositive/spark/truncate_column_buckets.q.out @@ -19,14 +19,14 @@ POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type: PREHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC PREHOOK: type: QUERY PREHOOK: Input: default@test_tab #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab #### A masked pattern was here #### @@ -43,14 +43,14 @@ POSTHOOK: Output: default@test_tab PREHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC PREHOOK: type: QUERY PREHOOK: Input: default@test_tab #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/truncate_column_buckets.q.out b/ql/src/test/results/clientpositive/truncate_column_buckets.q.out index cab0b83..4642c19 100644 --- a/ql/src/test/results/clientpositive/truncate_column_buckets.q.out +++ b/ql/src/test/results/clientpositive/truncate_column_buckets.q.out @@ -19,14 +19,14 @@ POSTHOOK: Lineage: test_tab.value SIMPLE [(src)src.FieldSchema(name:value, type: PREHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC PREHOOK: type: QUERY PREHOOK: Input: default@test_tab #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab #### A masked pattern was here #### @@ -43,14 +43,14 @@ POSTHOOK: Output: default@test_tab PREHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC PREHOOK: type: QUERY PREHOOK: Input: default@test_tab #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM ( SELECT INPUT__FILE__NAME file_name, count(*) cnt FROM test_tab GROUP BY INPUT__FILE__NAME -ORDER BY file_name DESC)a +)a ORDER BY file_name DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@test_tab #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index fd9908f..283762a 100644 --- a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -238,8 +238,7 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -251,10 +250,21 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: 1 (type: int) - sort order: + - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + Group By Vectorization: + groupByMode: HASH + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Map Vectorization: enabled: false #### A masked pattern was here #### @@ -263,61 +273,6 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - Group By Vectorization: - groupByMode: HASH - vectorOutput: false - native: false - vectorProcessingMode: NONE - projectedOutputColumns: null - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - projectedOutputColumns: [0] - Reduce Output Operator - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:bigint - partitionColumnCount: 0 - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: