From 3884be09936e874b74122830a45ae5cdf758e34b Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 16 Mar 2017 00:23:47 -0700 Subject: [PATCH] HIVE-16230 : Enable CBO in presence of hints --- .../calcite/CalciteSemanticException.java | 2 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 14 +- .../clientnegative/bucket_mapjoin_mismatch1.q | 1 + .../bucket_mapjoin_wrong_table_metadata_1.q | 2 +- .../bucket_mapjoin_wrong_table_metadata_2.q | 1 + .../test/queries/clientnegative/invalid_mapjoin1.q | 1 + ql/src/test/queries/clientnegative/join2.q | 1 + ql/src/test/queries/clientnegative/join28.q | 2 +- ql/src/test/queries/clientnegative/join29.q | 2 +- ql/src/test/queries/clientnegative/join32.q | 1 + ql/src/test/queries/clientnegative/join35.q | 2 +- .../queries/clientnegative/smb_bucketmapjoin.q | 2 +- .../test/queries/clientnegative/smb_mapjoin_14.q | 2 +- .../clientnegative/sortmerge_mapjoin_mismatch_1.q | 3 +- ql/src/test/queries/clientnegative/union22.q | 2 +- .../clientpositive/auto_sortmerge_join_11.q | 2 +- .../queries/clientpositive/bucket_map_join_1.q | 2 +- .../queries/clientpositive/bucket_map_join_2.q | 2 +- .../test/queries/clientpositive/bucketcontext_1.q | 2 +- .../test/queries/clientpositive/bucketcontext_2.q | 2 +- .../test/queries/clientpositive/bucketcontext_3.q | 2 +- .../test/queries/clientpositive/bucketcontext_4.q | 2 +- .../test/queries/clientpositive/bucketcontext_5.q | 2 +- .../test/queries/clientpositive/bucketcontext_6.q | 2 +- .../test/queries/clientpositive/bucketcontext_7.q | 2 +- .../test/queries/clientpositive/bucketcontext_8.q | 2 +- .../test/queries/clientpositive/bucketmapjoin10.q | 2 +- .../test/queries/clientpositive/bucketmapjoin11.q | 2 +- .../test/queries/clientpositive/bucketmapjoin12.q | 2 +- .../test/queries/clientpositive/bucketmapjoin13.q | 2 +- .../test/queries/clientpositive/bucketmapjoin5.q | 2 +- .../test/queries/clientpositive/bucketmapjoin7.q | 1 + .../test/queries/clientpositive/bucketmapjoin8.q | 2 +- .../test/queries/clientpositive/bucketmapjoin9.q | 1 + .../clientpositive/bucketmapjoin_negative.q | 1 + .../clientpositive/bucketmapjoin_negative2.q | 2 +- .../clientpositive/bucketmapjoin_negative3.q | 2 +- ql/src/test/queries/clientpositive/comments.q | 4 +- .../infer_bucket_sort_map_operators.q | 1 + ql/src/test/queries/clientpositive/join25.q | 2 +- ql/src/test/queries/clientpositive/join26.q | 1 + ql/src/test/queries/clientpositive/join27.q | 2 +- ql/src/test/queries/clientpositive/join30.q | 2 +- ql/src/test/queries/clientpositive/join36.q | 2 +- ql/src/test/queries/clientpositive/join37.q | 1 + ql/src/test/queries/clientpositive/join38.q | 1 + ql/src/test/queries/clientpositive/join39.q | 1 + ql/src/test/queries/clientpositive/join40.q | 1 + ql/src/test/queries/clientpositive/join_map_ppr.q | 1 + .../test/queries/clientpositive/join_on_varchar.q | 1 + ql/src/test/queries/clientpositive/join_reorder.q | 1 + ql/src/test/queries/clientpositive/join_reorder2.q | 1 + ql/src/test/queries/clientpositive/join_reorder3.q | 2 +- ql/src/test/queries/clientpositive/join_reorder4.q | 2 +- ql/src/test/queries/clientpositive/mapjoin1.q | 2 + .../test/queries/clientpositive/mapjoin_distinct.q | 1 + ql/src/test/queries/clientpositive/pcs.q | 3 +- ql/src/test/queries/clientpositive/skewjoin.q | 2 +- ql/src/test/queries/clientpositive/smb_mapjoin9.q | 3 +- ql/src/test/queries/clientpositive/smb_mapjoin_1.q | 1 + .../test/queries/clientpositive/smb_mapjoin_10.q | 2 +- .../test/queries/clientpositive/smb_mapjoin_11.q | 1 + .../test/queries/clientpositive/smb_mapjoin_12.q | 4 +- .../test/queries/clientpositive/smb_mapjoin_13.q | 2 +- .../test/queries/clientpositive/smb_mapjoin_16.q | 2 +- ql/src/test/queries/clientpositive/smb_mapjoin_2.q | 2 +- ql/src/test/queries/clientpositive/smb_mapjoin_3.q | 1 + ql/src/test/queries/clientpositive/smb_mapjoin_7.q | 2 +- .../clientpositive/sort_merge_join_desc_1.q | 2 +- .../clientpositive/sort_merge_join_desc_2.q | 1 + .../clientpositive/sort_merge_join_desc_3.q | 2 +- .../clientpositive/sort_merge_join_desc_4.q | 2 +- .../clientpositive/sort_merge_join_desc_5.q | 1 + .../clientpositive/sort_merge_join_desc_6.q | 2 +- .../clientpositive/sort_merge_join_desc_7.q | 2 +- .../clientpositive/sort_merge_join_desc_8.q | 2 +- .../clientpositive/vectorized_bucketmapjoin1.q | 2 +- ql/src/test/results/clientpositive/comments.q.out | 76 +- ql/src/test/results/clientpositive/join25.q.out | 114 +- ql/src/test/results/clientpositive/join26.q.out | 418 +++----- ql/src/test/results/clientpositive/join27.q.out | 112 +- ql/src/test/results/clientpositive/join30.q.out | 66 +- ql/src/test/results/clientpositive/join36.q.out | 114 +- ql/src/test/results/clientpositive/join37.q.out | 114 +- ql/src/test/results/clientpositive/join38.q.out | 86 +- ql/src/test/results/clientpositive/join39.q.out | 86 +- ql/src/test/results/clientpositive/join40.q.out | 117 +-- .../results/clientpositive/join_on_varchar.q.out | 46 +- .../test/results/clientpositive/join_reorder.q.out | 106 +- .../results/clientpositive/join_reorder4.q.out | 327 +++--- .../clientpositive/llap/bucketmapjoin1.q.out | 204 ++-- .../clientpositive/llap/bucketmapjoin2.q.out | 144 +-- .../clientpositive/llap/bucketmapjoin3.q.out | 96 +- .../clientpositive/llap/bucketmapjoin4.q.out | 96 +- .../clientpositive/llap/explainuser_1.q.out | 302 +++--- .../results/clientpositive/llap/join_filters.q.out | 8 +- .../results/clientpositive/llap/join_nulls.q.out | 2 +- .../results/clientpositive/llap/skewjoin.q.out | 272 +++-- .../clientpositive/llap/smb_mapjoin_14.q.out | 207 ++-- .../clientpositive/llap/smb_mapjoin_15.q.out | 276 ++--- .../clientpositive/llap/smb_mapjoin_17.q.out | 98 +- .../clientpositive/llap/smb_mapjoin_4.q.out | 889 +++++++++------- .../clientpositive/llap/smb_mapjoin_5.q.out | 889 +++++++++------- .../clientpositive/llap/smb_mapjoin_6.q.out | 274 ++--- .../test/results/clientpositive/llap/stats11.q.out | 96 +- .../llap/table_access_keys_stats.q.out | 2 +- ql/src/test/results/clientpositive/mapjoin1.q.out | 340 +++--- .../results/clientpositive/mapjoin_distinct.q.out | 256 +++-- .../test/results/clientpositive/mapjoin_hook.q.out | 4 +- ql/src/test/results/clientpositive/pcs.q.out | 150 ++- ql/src/test/results/clientpositive/skewjoin.q.out | 198 ++-- .../clientpositive/spark/bucketmapjoin1.q.out | 656 ++++++------ .../clientpositive/spark/bucketmapjoin2.q.out | 612 +++++------ .../clientpositive/spark/bucketmapjoin3.q.out | 368 +++---- .../clientpositive/spark/bucketmapjoin4.q.out | 318 +++--- .../test/results/clientpositive/spark/join25.q.out | 56 +- .../test/results/clientpositive/spark/join26.q.out | 192 ++-- .../test/results/clientpositive/spark/join27.q.out | 54 +- .../test/results/clientpositive/spark/join30.q.out | 56 +- .../test/results/clientpositive/spark/join36.q.out | 64 +- .../test/results/clientpositive/spark/join37.q.out | 56 +- .../test/results/clientpositive/spark/join38.q.out | 84 +- .../test/results/clientpositive/spark/join39.q.out | 32 +- .../clientpositive/spark/join_reorder.q.out | 106 +- .../clientpositive/spark/join_reorder4.q.out | 339 +++--- .../results/clientpositive/spark/mapjoin1.q.out | 284 ++--- .../clientpositive/spark/mapjoin_distinct.q.out | 192 ++-- .../results/clientpositive/spark/skewjoin.q.out | 198 ++-- .../clientpositive/spark/smb_mapjoin_14.q.out | 822 ++++++++++----- .../clientpositive/spark/smb_mapjoin_15.q.out | 566 +++++++--- .../clientpositive/spark/smb_mapjoin_17.q.out | 168 ++- .../clientpositive/spark/smb_mapjoin_4.q.out | 1093 +++++++++++++++----- .../clientpositive/spark/smb_mapjoin_5.q.out | 1093 +++++++++++++++----- .../clientpositive/spark/smb_mapjoin_6.q.out | 360 +++++-- .../spark/table_access_keys_stats.q.out | 2 +- 135 files changed, 8457 insertions(+), 6048 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java index 5b2c9c0..0c6996c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java @@ -31,7 +31,7 @@ public enum UnsupportedFeature { Distinct_without_an_aggreggation, Duplicates_in_RR, Filter_expression_with_non_boolean_return_type, - Having_clause_without_any_groupby, Hint, Invalid_column_reference, Invalid_decimal, + Having_clause_without_any_groupby, Invalid_column_reference, Invalid_decimal, Less_than_equal_greater_than, Others, Same_name_in_multiple_expressions, Schema_less_table, Select_alias_in_having_clause, Select_transform, Subquery, Table_sample_clauses, UDTF, Union_type, Unique_join diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index fc7d510..e75e892 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -442,7 +442,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept throw new SemanticException(e); } else if( e instanceof CalciteViewSemanticException) { - // non-cbo path retries to execute create view and + // non-cbo path retries to execute create view and // we believe it will throw the same error message throw new SemanticException(e); } @@ -1009,7 +1009,7 @@ public ASTNode simpleBreadthFirstSearch(ASTNode ast, int... tokens) { } return null; } - + public ASTNode depthFirstSearch(ASTNode ast, int token) { searchQueue.clear(); searchQueue.add(ast); @@ -1431,7 +1431,7 @@ public RelOptMaterialization apply(RelOptMaterialization materialization) { if (viewScan instanceof DruidQuery) { final DruidQuery dq = (DruidQuery) viewScan; newViewScan = DruidQuery.create(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), - (RelOptHiveTable) viewScan.getTable(), dq.getDruidTable(), + viewScan.getTable(), dq.getDruidTable(), ImmutableList.of(dq.getTableScan())); } else { newViewScan = new HiveTableScan(optCluster, optCluster.traitSetOf(HiveRelNode.CONVENTION), @@ -3520,13 +3520,7 @@ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, int posn = 0; boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT); if (hintPresent) { - String hint = ctx.getTokenRewriteStream().toString( - selExprList.getChild(0).getTokenStartIndex(), - selExprList.getChild(0).getTokenStopIndex()); - String msg = String.format("Hint specified for %s." - + " Currently we don't support hints in CBO, turn off cbo to use hints.", hint); - LOG.debug(msg); - throw new CalciteSemanticException(msg, UnsupportedFeature.Hint); + posn++; } // 4. Bailout if select involves Transform diff --git a/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q b/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q index ff982c6..7b207c6 100644 --- a/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q +++ b/ql/src/test/queries/clientnegative/bucket_mapjoin_mismatch1.q @@ -29,6 +29,7 @@ select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and a.ds="2008-04-08" and b.ds="2008-04-08"; +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain diff --git a/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q b/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q index fa91552..378a77f 100644 --- a/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q +++ b/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_1.q @@ -17,6 +17,6 @@ load data local inpath '../../data/files/T2.txt' overwrite into table table2; set hive.optimize.bucketmapjoin = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value; diff --git a/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q b/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q index b392c42..9a01fba 100644 --- a/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q +++ b/ql/src/test/queries/clientnegative/bucket_mapjoin_wrong_table_metadata_2.q @@ -18,6 +18,7 @@ load data local inpath '../../data/files/T1.txt' overwrite into table table1 par load data local inpath '../../data/files/T1.txt' overwrite into table table2; load data local inpath '../../data/files/T2.txt' overwrite into table table2; +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; diff --git a/ql/src/test/queries/clientnegative/invalid_mapjoin1.q b/ql/src/test/queries/clientnegative/invalid_mapjoin1.q index 56d9211..a6efd44 100644 --- a/ql/src/test/queries/clientnegative/invalid_mapjoin1.q +++ b/ql/src/test/queries/clientnegative/invalid_mapjoin1.q @@ -1 +1,2 @@ +set hive.cbo.enable=false; select /*+ MAPJOIN(a) ,MAPJOIN(b)*/ * from src a join src b on (a.key=b.key and a.value=b.value); diff --git a/ql/src/test/queries/clientnegative/join2.q b/ql/src/test/queries/clientnegative/join2.q index 98a5f1e..aad29c7 100644 --- a/ql/src/test/queries/clientnegative/join2.q +++ b/ql/src/test/queries/clientnegative/join2.q @@ -1,3 +1,4 @@ +set hive.cbo.enable=false; SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x LEFT OUTER JOIN src y ON (x.key = y.key); diff --git a/ql/src/test/queries/clientnegative/join28.q b/ql/src/test/queries/clientnegative/join28.q index 32ff105..a3fbb4e 100644 --- a/ql/src/test/queries/clientnegative/join28.q +++ b/ql/src/test/queries/clientnegative/join28.q @@ -1,5 +1,5 @@ CREATE TABLE dest_j1(key STRING, value STRING) STORED AS TEXTFILE; - +set hive.cbo.enable=false; -- Mapjoin followed by mapjoin is not supported. -- The same query would work fine without the hint. -- Note that there is a positive test with the same name in clientpositive diff --git a/ql/src/test/queries/clientnegative/join29.q b/ql/src/test/queries/clientnegative/join29.q index 53a1652..72aa1ad 100644 --- a/ql/src/test/queries/clientnegative/join29.q +++ b/ql/src/test/queries/clientnegative/join29.q @@ -1,5 +1,5 @@ CREATE TABLE dest_j1(key STRING, cnt1 INT, cnt2 INT); - +set hive.cbo.enable=false; -- Mapjoin followed by group by is not supported. -- The same query would work without the hint -- Note that there is a positive test with the same name in clientpositive diff --git a/ql/src/test/queries/clientnegative/join32.q b/ql/src/test/queries/clientnegative/join32.q index 54a4dcd..f0547ec 100644 --- a/ql/src/test/queries/clientnegative/join32.q +++ b/ql/src/test/queries/clientnegative/join32.q @@ -1,3 +1,4 @@ +set hive.cbo.enable=false; CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; -- Mapjoin followed by Mapjoin is not supported. diff --git a/ql/src/test/queries/clientnegative/join35.q b/ql/src/test/queries/clientnegative/join35.q index fc8f77c..bd9e8bb 100644 --- a/ql/src/test/queries/clientnegative/join35.q +++ b/ql/src/test/queries/clientnegative/join35.q @@ -1,5 +1,5 @@ CREATE TABLE dest_j1(key STRING, value STRING, val2 INT) STORED AS TEXTFILE; - +set hive.cbo.enable=false; -- Mapjoin followed by union is not supported. -- The same query would work without the hint -- Note that there is a positive test with the same name in clientpositive diff --git a/ql/src/test/queries/clientnegative/smb_bucketmapjoin.q b/ql/src/test/queries/clientnegative/smb_bucketmapjoin.q index c252d86..1cd7066 100644 --- a/ql/src/test/queries/clientnegative/smb_bucketmapjoin.q +++ b/ql/src/test/queries/clientnegative/smb_bucketmapjoin.q @@ -16,7 +16,7 @@ select * from src; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; - +set hive.cbo.enable=false; select /*+mapjoin(a)*/ * from smb_bucket4_1 a left outer join smb_bucket4_2 b on a.key = b.key; diff --git a/ql/src/test/queries/clientnegative/smb_mapjoin_14.q b/ql/src/test/queries/clientnegative/smb_mapjoin_14.q index 688989a..58a406c 100644 --- a/ql/src/test/queries/clientnegative/smb_mapjoin_14.q +++ b/ql/src/test/queries/clientnegative/smb_mapjoin_14.q @@ -13,7 +13,7 @@ select * from src where key < 10; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; -- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -- A join followed by mapjoin is not allowed, so this query should fail. diff --git a/ql/src/test/queries/clientnegative/sortmerge_mapjoin_mismatch_1.q b/ql/src/test/queries/clientnegative/sortmerge_mapjoin_mismatch_1.q index 8fbbd96..aac6983 100644 --- a/ql/src/test/queries/clientnegative/sortmerge_mapjoin_mismatch_1.q +++ b/ql/src/test/queries/clientnegative/sortmerge_mapjoin_mismatch_1.q @@ -11,7 +11,7 @@ insert overwrite table table_desc select key, value from src; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; -- If the user asked for sort merge join to be enforced (by setting -- hive.enforce.sortmergebucketmapjoin to true), an error should be thrown, since -- one of the tables is in ascending order and the other is in descending order, @@ -23,6 +23,5 @@ explain select /*+mapjoin(a)*/ * from table_asc a join table_desc b on a.key = b.key; set hive.enforce.sortmergebucketmapjoin=true; - explain select /*+mapjoin(a)*/ * from table_asc a join table_desc b on a.key = b.key; diff --git a/ql/src/test/queries/clientnegative/union22.q b/ql/src/test/queries/clientnegative/union22.q index c745d2a..0e43cd3 100644 --- a/ql/src/test/queries/clientnegative/union22.q +++ b/ql/src/test/queries/clientnegative/union22.q @@ -9,7 +9,7 @@ insert overwrite table dst_union22_delta partition (ds='1') select key, key, value, key, value, value from src; set hive.merge.mapfiles=false; - +set hive.cbo.enable=false; -- Union followed by Mapjoin is not supported. -- The same query would work without the hint -- Note that there is a positive test with the same name in clientpositive diff --git a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q index 2dbde47..11499f8 100644 --- a/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q +++ b/ql/src/test/queries/clientpositive/auto_sortmerge_join_11.q @@ -33,7 +33,7 @@ set hive.optimize.bucketmapjoin.sortedmerge=true; -- bucketized mapjoin is not done explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; - +set hive.cbo.enable=false; -- The join is converted to a bucketed mapjoin with a mapjoin hint explain extended select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ mapjoin(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_1.q b/ql/src/test/queries/clientpositive/bucket_map_join_1.q index a92ffa4..2e62e5e 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_1.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_1.q @@ -16,7 +16,7 @@ load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; - +set hive.cbo.enable=false; -- The tables are bucketed in same columns in different order, -- but sorted in different column orders -- Neither bucketed map-join, nor sort-merge join should be performed diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_2.q b/ql/src/test/queries/clientpositive/bucket_map_join_2.q index 4aa6f57..4c2fe82 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_2.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_2.q @@ -16,7 +16,7 @@ load data local inpath '../../data/files/SortCol2Col1.txt' overwrite into table set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; - +set hive.cbo.enable=false; -- The tables are bucketed in same columns in different order, -- but sorted in different column orders -- Neither bucketed map-join, nor sort-merge join should be performed diff --git a/ql/src/test/queries/clientpositive/bucketcontext_1.q b/ql/src/test/queries/clientpositive/bucketcontext_1.q index eb9c273..2ac2aa7 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_1.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_1.q @@ -16,7 +16,7 @@ load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bu load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucketcontext_2.q b/ql/src/test/queries/clientpositive/bucketcontext_2.q index 394a7ff..b8955fe 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_2.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_2.q @@ -14,7 +14,7 @@ load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bu load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucketcontext_3.q b/ql/src/test/queries/clientpositive/bucketcontext_3.q index 5bf235c..cf9d6e9 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_3.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_3.q @@ -14,7 +14,7 @@ load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bu load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucketcontext_4.q b/ql/src/test/queries/clientpositive/bucketcontext_4.q index 95b67a4..c41f944 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_4.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_4.q @@ -16,7 +16,7 @@ load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bu CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucketcontext_5.q b/ql/src/test/queries/clientpositive/bucketcontext_5.q index 67821ce..689a4a5 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_5.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_5.q @@ -10,7 +10,7 @@ load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bu CREATE TABLE bucket_big (key string, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big; load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big; - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucketcontext_6.q b/ql/src/test/queries/clientpositive/bucketcontext_6.q index 8d77243..2b5ca58 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_6.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_6.q @@ -14,7 +14,7 @@ load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bu load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucketcontext_7.q b/ql/src/test/queries/clientpositive/bucketcontext_7.q index de6ef76..abbf07c 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_7.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_7.q @@ -19,7 +19,7 @@ load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bu load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucketcontext_8.q b/ql/src/test/queries/clientpositive/bucketcontext_8.q index 0a60c1a..bff938c 100644 --- a/ql/src/test/queries/clientpositive/bucketcontext_8.q +++ b/ql/src/test/queries/clientpositive/bucketcontext_8.q @@ -19,7 +19,7 @@ load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bu load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; explain extended select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; select /*+ MAPJOIN(a) */ count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin10.q b/ql/src/test/queries/clientpositive/bucketmapjoin10.q index d9b7edf..c2f3155 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin10.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin10.q @@ -23,7 +23,7 @@ LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_m LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS; - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin=true; -- The table bucketing metadata matches but the partition metadata does not, bucket map join should not be used diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin11.q b/ql/src/test/queries/clientpositive/bucketmapjoin11.q index 3c630ef..f15389a 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin11.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin11.q @@ -24,7 +24,7 @@ ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_2 PARTITION (part='2'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin=true; -- The table and partition bucketing metadata doesn't match but the bucket numbers of all partitions is diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin12.q b/ql/src/test/queries/clientpositive/bucketmapjoin12.q index 4229c5c..dcf3991 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin12.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin12.q @@ -20,7 +20,7 @@ LOAD DATA LOCAL INPATH '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_m LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part_3 PARTITION (part='1'); ALTER TABLE srcbucket_mapjoin_part_3 CLUSTERED BY (key) INTO 2 BUCKETS; - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin=true; -- The partition bucketing metadata match but one table is not bucketed, bucket map join should still be used diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin13.q b/ql/src/test/queries/clientpositive/bucketmapjoin13.q index c8f01f7..456d155 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin13.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin13.q @@ -20,7 +20,7 @@ CLUSTERED BY (key) INTO 2 BUCKETS; -- part=1 partition for srcbucket_mapjoin_part_2 is bucketed by 'key' INSERT OVERWRITE TABLE srcbucket_mapjoin_part_2 PARTITION (part='1') SELECT * FROM src; - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin=true; -- part=1 partition for srcbucket_mapjoin_part_1 is bucketed by 'value' diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin5.q b/ql/src/test/queries/clientpositive/bucketmapjoin5.q index 385087e..59ad98c 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin5.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin5.q @@ -23,7 +23,7 @@ load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_m create table bucketmapjoin_hash_result_1 (key bigint , value1 bigint, value2 bigint); create table bucketmapjoin_hash_result_2 (key bigint , value1 bigint, value2 bigint); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin7.q b/ql/src/test/queries/clientpositive/bucketmapjoin7.q index 3830e9d..43cc19b 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin7.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin7.q @@ -1,4 +1,5 @@ set hive.strict.checks.bucketing=false; +set hive.cbo.enable=false; set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin8.q b/ql/src/test/queries/clientpositive/bucketmapjoin8.q index 99eeabd..ecfe597 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin8.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin8.q @@ -15,7 +15,7 @@ LOAD DATA LOCAL INPATH '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_m ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 3 BUCKETS; set hive.optimize.bucketmapjoin=true; - +set hive.cbo.enable=false; -- The partition bucketing metadata match but the tables have different numbers of buckets, bucket map join should still be used EXPLAIN EXTENDED diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin9.q b/ql/src/test/queries/clientpositive/bucketmapjoin9.q index 13189bc..b7d3f6b 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin9.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin9.q @@ -17,6 +17,7 @@ ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) INTO 2 BUCKETS; set hive.optimize.bucketmapjoin=true; +set hive.cbo.enable=false; -- The table bucketing metadata matches but the partitions have different numbers of buckets, bucket map join should not be used EXPLAIN EXTENDED diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q b/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q index 0d84fdd..9f68584 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin_negative.q @@ -15,6 +15,7 @@ load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_m +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q b/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q index 93780dc..0dbd4aa 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin_negative2.q @@ -10,7 +10,7 @@ load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_m load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-09'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); diff --git a/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q b/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q index 51dc40b..b08d22e 100644 --- a/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q +++ b/ql/src/test/queries/clientpositive/bucketmapjoin_negative3.q @@ -25,7 +25,7 @@ load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test3; load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE test4; load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE test4; load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE test4; - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; -- should be allowed explain extended select /*+ MAPJOIN(R) */ * from test1 L join test1 R on L.key=R.key AND L.value=R.value; diff --git a/ql/src/test/queries/clientpositive/comments.q b/ql/src/test/queries/clientpositive/comments.q index cacb689..b03cd3f 100644 --- a/ql/src/test/queries/clientpositive/comments.q +++ b/ql/src/test/queries/clientpositive/comments.q @@ -15,7 +15,9 @@ select /**/ key /* */ from src limit 1; */ select /* */ key from src limit 1; - +set hive.auto.convert.join=true; select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0; explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0; + +reset hive.auto.convert.join; diff --git a/ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q b/ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q index 3d57083..1a778fa 100644 --- a/ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q +++ b/ql/src/test/queries/clientpositive/infer_bucket_sort_map_operators.q @@ -52,6 +52,7 @@ DESCRIBE FORMATTED test_table_out PARTITION (part = '1'); set hive.map.groupby.sorted=false; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; +set hive.cbo.enable=false; -- Test SMB join doesn't affect inference, should not be bucketed or sorted EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') diff --git a/ql/src/test/queries/clientpositive/join25.q b/ql/src/test/queries/clientpositive/join25.q index b093d69..b8d7c22 100644 --- a/ql/src/test/queries/clientpositive/join25.q +++ b/ql/src/test/queries/clientpositive/join25.q @@ -1,7 +1,7 @@ -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key INT, value STRING, val2 STRING) STORED AS TEXTFILE; - +set hive.auto.convert.join=true; EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value diff --git a/ql/src/test/queries/clientpositive/join26.q b/ql/src/test/queries/clientpositive/join26.q index d90ce40..d34454d 100644 --- a/ql/src/test/queries/clientpositive/join26.q +++ b/ql/src/test/queries/clientpositive/join26.q @@ -2,6 +2,7 @@ CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; +set hive.auto.convert.join=true; EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value diff --git a/ql/src/test/queries/clientpositive/join27.q b/ql/src/test/queries/clientpositive/join27.q index a9000be..d53f553 100644 --- a/ql/src/test/queries/clientpositive/join27.q +++ b/ql/src/test/queries/clientpositive/join27.q @@ -1,7 +1,7 @@ -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key INT, value STRING, val2 STRING) STORED AS TEXTFILE; - +set hive.auto.convert.join=true; EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value diff --git a/ql/src/test/queries/clientpositive/join30.q b/ql/src/test/queries/clientpositive/join30.q index 292a310..004581f 100644 --- a/ql/src/test/queries/clientpositive/join30.q +++ b/ql/src/test/queries/clientpositive/join30.q @@ -1,7 +1,7 @@ -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key INT, cnt INT); - +set hive.auto.convert.join=true; EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key; diff --git a/ql/src/test/queries/clientpositive/join36.q b/ql/src/test/queries/clientpositive/join36.q index ed71439..89fbb4c 100644 --- a/ql/src/test/queries/clientpositive/join36.q +++ b/ql/src/test/queries/clientpositive/join36.q @@ -9,7 +9,7 @@ SELECT key, count(1) from src group by key; INSERT OVERWRITE TABLE tmp2 SELECT key, count(1) from src group by key; - +set hive.auto.convert.join=true; EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt diff --git a/ql/src/test/queries/clientpositive/join37.q b/ql/src/test/queries/clientpositive/join37.q index e029415..92cae4d 100644 --- a/ql/src/test/queries/clientpositive/join37.q +++ b/ql/src/test/queries/clientpositive/join37.q @@ -2,6 +2,7 @@ CREATE TABLE dest_j1(key INT, value STRING, val2 STRING) STORED AS TEXTFILE; +set hive.auto.convert.join=true; EXPLAIN INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value diff --git a/ql/src/test/queries/clientpositive/join38.q b/ql/src/test/queries/clientpositive/join38.q index 7fbe377..ed7b9d8 100644 --- a/ql/src/test/queries/clientpositive/join38.q +++ b/ql/src/test/queries/clientpositive/join38.q @@ -5,6 +5,7 @@ create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,co insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100; select * from tmp; +set hive.auto.convert.join=true; explain FROM src a JOIN tmp b ON (a.key = b.col11) diff --git a/ql/src/test/queries/clientpositive/join39.q b/ql/src/test/queries/clientpositive/join39.q index ab12158..e14a114 100644 --- a/ql/src/test/queries/clientpositive/join39.q +++ b/ql/src/test/queries/clientpositive/join39.q @@ -1,6 +1,7 @@ -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE; +set hive.auto.convert.join=true; explain INSERT OVERWRITE TABLE dest_j1 diff --git a/ql/src/test/queries/clientpositive/join40.q b/ql/src/test/queries/clientpositive/join40.q index 1e6b2b3..ee34983 100644 --- a/ql/src/test/queries/clientpositive/join40.q +++ b/ql/src/test/queries/clientpositive/join40.q @@ -33,6 +33,7 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 A SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; +set hive.auto.convert.join=true; EXPLAIN SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key); diff --git a/ql/src/test/queries/clientpositive/join_map_ppr.q b/ql/src/test/queries/clientpositive/join_map_ppr.q index 23070b1..840d303 100644 --- a/ql/src/test/queries/clientpositive/join_map_ppr.q +++ b/ql/src/test/queries/clientpositive/join_map_ppr.q @@ -1,6 +1,7 @@ -- SORT_QUERY_RESULTS CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; +set hive.cbo.enable=false; EXPLAIN EXTENDED INSERT OVERWRITE TABLE dest_j1 diff --git a/ql/src/test/queries/clientpositive/join_on_varchar.q b/ql/src/test/queries/clientpositive/join_on_varchar.q index a1ce1ff..5016369 100644 --- a/ql/src/test/queries/clientpositive/join_on_varchar.q +++ b/ql/src/test/queries/clientpositive/join_on_varchar.q @@ -7,6 +7,7 @@ insert into table tbl1 select repeat('t', 10), 11 from src limit 1; insert into table tbl1 select repeat('s', 10), 22 from src limit 1; insert into table tbl2 select concat(repeat('t', 10), 'ppp') from src limit 1; insert into table tbl2 select repeat('s', 10) from src limit 1; +set hive.auto.convert.join=true; explain select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2; diff --git a/ql/src/test/queries/clientpositive/join_reorder.q b/ql/src/test/queries/clientpositive/join_reorder.q index 55aaaed..5161f1b 100644 --- a/ql/src/test/queries/clientpositive/join_reorder.q +++ b/ql/src/test/queries/clientpositive/join_reorder.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; +set hive.cbo.enable=false; diff --git a/ql/src/test/queries/clientpositive/join_reorder2.q b/ql/src/test/queries/clientpositive/join_reorder2.q index ca1e65e..93a0dba 100644 --- a/ql/src/test/queries/clientpositive/join_reorder2.q +++ b/ql/src/test/queries/clientpositive/join_reorder2.q @@ -1,4 +1,5 @@ +set hive.cbo.enable=false; diff --git a/ql/src/test/queries/clientpositive/join_reorder3.q b/ql/src/test/queries/clientpositive/join_reorder3.q index 994be16..7d5a95d 100644 --- a/ql/src/test/queries/clientpositive/join_reorder3.q +++ b/ql/src/test/queries/clientpositive/join_reorder3.q @@ -1,4 +1,4 @@ - +set hive.cbo.enable=false; diff --git a/ql/src/test/queries/clientpositive/join_reorder4.q b/ql/src/test/queries/clientpositive/join_reorder4.q index 16ef204..265980b 100644 --- a/ql/src/test/queries/clientpositive/join_reorder4.q +++ b/ql/src/test/queries/clientpositive/join_reorder4.q @@ -6,7 +6,7 @@ LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1; LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2; LOAD DATA LOCAL INPATH '../../data/files/T3.txt' INTO TABLE T3; -set hive.auto.convert.join=true; +set hive.cbo.enable=false; explain select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3; select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3; diff --git a/ql/src/test/queries/clientpositive/mapjoin1.q b/ql/src/test/queries/clientpositive/mapjoin1.q index 70bfe2e..568557d 100644 --- a/ql/src/test/queries/clientpositive/mapjoin1.q +++ b/ql/src/test/queries/clientpositive/mapjoin1.q @@ -1,3 +1,5 @@ + +set hive.auto.convert.join=true; SELECT /*+ MAPJOIN(b) */ sum(a.key) as sum_a FROM srcpart a JOIN src b ON a.key = b.key where a.ds is not null; diff --git a/ql/src/test/queries/clientpositive/mapjoin_distinct.q b/ql/src/test/queries/clientpositive/mapjoin_distinct.q index d021c63..3afc9c6 100644 --- a/ql/src/test/queries/clientpositive/mapjoin_distinct.q +++ b/ql/src/test/queries/clientpositive/mapjoin_distinct.q @@ -1,4 +1,5 @@ set hive.map.aggr = true; +set hive.auto.convert.join=true; set hive.groupby.skewindata = true; explain FROM srcpart c diff --git a/ql/src/test/queries/clientpositive/pcs.q b/ql/src/test/queries/clientpositive/pcs.q index 0667e95..eb16647 100644 --- a/ql/src/test/queries/clientpositive/pcs.q +++ b/ql/src/test/queries/clientpositive/pcs.q @@ -26,6 +26,7 @@ select ds from pcs_t1 where struct(ds, key) in (struct('2000-04-08',1), struct(' explain extended select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); select ds from pcs_t1 where struct(ds, key+2) in (struct('2000-04-08',3), struct('2000-04-09',4)); +set hive.cbo.enable=false; explain extended select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); select /*+ MAPJOIN(pcs_t1) */ a.ds, b.key from pcs_t1 a join pcs_t1 b on a.ds=b.ds where struct(a.ds, a.key, b.ds) in (struct('2000-04-08',1, '2000-04-09'), struct('2000-04-09',2, '2000-04-08')); @@ -64,4 +65,4 @@ explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)); explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); -select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); \ No newline at end of file +select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0); diff --git a/ql/src/test/queries/clientpositive/skewjoin.q b/ql/src/test/queries/clientpositive/skewjoin.q index 52b8639..e4b178a 100644 --- a/ql/src/test/queries/clientpositive/skewjoin.q +++ b/ql/src/test/queries/clientpositive/skewjoin.q @@ -24,7 +24,7 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value; SELECT sum(hash(key)), sum(hash(value)) FROM dest_j1; - +set hive.cbo.enable=false; EXPLAIN SELECT /*+ STREAMTABLE(a) */ * FROM T1 a JOIN T2 b ON a.key = b.key diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin9.q b/ql/src/test/queries/clientpositive/smb_mapjoin9.q index c297447..e915951 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin9.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin9.q @@ -2,6 +2,7 @@ create table hive_test_smb_bucket1 (key int, value string) partitioned by (ds st create table hive_test_smb_bucket2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets; set hive.optimize.bucketmapjoin = true; +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; @@ -54,4 +55,4 @@ ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS N drop table smb_mapjoin9_results; drop table hive_test_smb_bucket1; -drop table hive_test_smb_bucket2; \ No newline at end of file +drop table hive_test_smb_bucket2; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_1.q b/ql/src/test/queries/clientpositive/smb_mapjoin_1.q index 6051e4e1..baf1690 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_1.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_1.q @@ -12,6 +12,7 @@ load data local inpath '../../data/files/smbbucket_1.rc' overwrite into table sm load data local inpath '../../data/files/smbbucket_2.rc' overwrite into table smb_bucket_2; load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table smb_bucket_3; +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_10.q b/ql/src/test/queries/clientpositive/smb_mapjoin_10.q index cd332c8..ab8258f 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_10.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_10.q @@ -13,7 +13,7 @@ load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_buck load data local inpath '../../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); load data local inpath '../../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); - +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_11.q b/ql/src/test/queries/clientpositive/smb_mapjoin_11.q index ccb97b3..66bdff1 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_11.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_11.q @@ -3,6 +3,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +set hive.cbo.enable=false; set hive.exec.reducers.max = 1; set hive.merge.mapfiles=false; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_12.q b/ql/src/test/queries/clientpositive/smb_mapjoin_12.q index 7a506ad..c209308 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_12.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_12.q @@ -6,7 +6,7 @@ set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; set hive.exec.reducers.max = 1; set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; - +set hive.cbo.enable=false; -- This test verifies that the output of a sort merge join on 1 big partition with multiple small partitions is bucketed and sorted -- Create two bucketed and sorted tables @@ -46,4 +46,4 @@ SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN t INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1'; -SELECT count(*) from test_table3 tablesample (bucket 2 out of 16) a where ds = '2'; \ No newline at end of file +SELECT count(*) from test_table3 tablesample (bucket 2 out of 16) a where ds = '2'; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_13.q b/ql/src/test/queries/clientpositive/smb_mapjoin_13.q index 744db88..6cba8ea 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_13.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_13.q @@ -1,7 +1,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; set hive.exec.reducers.max = 1; set hive.merge.mapfiles=false; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_16.q b/ql/src/test/queries/clientpositive/smb_mapjoin_16.q index bff11dd..4bf6379 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_16.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_16.q @@ -1,7 +1,7 @@ set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; set hive.exec.reducers.max = 1; set hive.merge.mapfiles=false; diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_2.q b/ql/src/test/queries/clientpositive/smb_mapjoin_2.q index 672cd54..1b184d8 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_2.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_2.q @@ -15,7 +15,7 @@ load data local inpath '../../data/files/smbbucket_3.rc' overwrite into table sm set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; -- SORT_QUERY_RESULTS explain diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_3.q b/ql/src/test/queries/clientpositive/smb_mapjoin_3.q index 48f6ed2..ecd38cc 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_3.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_3.q @@ -1,3 +1,4 @@ +set hive.cbo.enable=false; set hive.strict.checks.bucketing=false; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_7.q b/ql/src/test/queries/clientpositive/smb_mapjoin_7.q index f0b41a4..4a6afb0 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_7.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_7.q @@ -25,7 +25,7 @@ select * from src; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; insert overwrite table smb_join_results_empty_bigtable select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key; diff --git a/ql/src/test/queries/clientpositive/sort_merge_join_desc_1.q b/ql/src/test/queries/clientpositive/sort_merge_join_desc_1.q index efa0178..c02f500 100644 --- a/ql/src/test/queries/clientpositive/sort_merge_join_desc_1.q +++ b/ql/src/test/queries/clientpositive/sort_merge_join_desc_1.q @@ -12,7 +12,7 @@ insert overwrite table table_desc2 select key, value from src; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; -- The columns of the tables above are sorted in same descending order. -- So, sort merge join should be performed diff --git a/ql/src/test/queries/clientpositive/sort_merge_join_desc_2.q b/ql/src/test/queries/clientpositive/sort_merge_join_desc_2.q index 2b787b8..6044c51 100644 --- a/ql/src/test/queries/clientpositive/sort_merge_join_desc_2.q +++ b/ql/src/test/queries/clientpositive/sort_merge_join_desc_2.q @@ -11,6 +11,7 @@ sorted by (key DESC, value DESC) into 1 BUCKETS; insert overwrite table table_desc1 select key, value from src; insert overwrite table table_desc2 select key, value from src; +set hive.cbo.enable=false; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; diff --git a/ql/src/test/queries/clientpositive/sort_merge_join_desc_3.q b/ql/src/test/queries/clientpositive/sort_merge_join_desc_3.q index bdc550c..2a5e6e2 100644 --- a/ql/src/test/queries/clientpositive/sort_merge_join_desc_3.q +++ b/ql/src/test/queries/clientpositive/sort_merge_join_desc_3.q @@ -14,7 +14,7 @@ insert overwrite table table_desc2 select key, value from src; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; -- The columns of the tables above are sorted in same orders. -- descending followed by ascending -- So, sort merge join should be performed diff --git a/ql/src/test/queries/clientpositive/sort_merge_join_desc_4.q b/ql/src/test/queries/clientpositive/sort_merge_join_desc_4.q index 89a26fd..3b23304 100644 --- a/ql/src/test/queries/clientpositive/sort_merge_join_desc_4.q +++ b/ql/src/test/queries/clientpositive/sort_merge_join_desc_4.q @@ -14,7 +14,7 @@ insert overwrite table table_desc2 select key, value from src; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; -- The columns of the tables above are sorted in different orders. -- So, sort merge join should not be performed diff --git a/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q b/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q index 9f32f53..5be80b4 100644 --- a/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q +++ b/ql/src/test/queries/clientpositive/sort_merge_join_desc_5.q @@ -1,4 +1,5 @@ +set hive.cbo.enable=false; CREATE TABLE srcbucket_mapjoin_part_1 (key INT, value STRING) PARTITIONED BY (part STRING) diff --git a/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q b/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q index e733538..0c0422f 100644 --- a/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q +++ b/ql/src/test/queries/clientpositive/sort_merge_join_desc_6.q @@ -14,7 +14,7 @@ ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key) SORTED BY (key DESC) INT set hive.optimize.bucketmapjoin=true; set hive.optimize.bucketmapjoin.sortedmerge = true; - +set hive.cbo.enable=false; -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used EXPLAIN EXTENDED diff --git a/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q b/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q index fe523be..3be7c2d 100644 --- a/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q +++ b/ql/src/test/queries/clientpositive/sort_merge_join_desc_7.q @@ -20,7 +20,7 @@ ALTER TABLE srcbucket_mapjoin_part_2 CLUSTERED BY (key, value) SORTED BY (value set hive.optimize.bucketmapjoin=true; set hive.optimize.bucketmapjoin.sortedmerge = true; - +set hive.cbo.enable=false; -- The table sorting metadata matches but the partition metadata does not, sorted merge join should not be used EXPLAIN EXTENDED diff --git a/ql/src/test/queries/clientpositive/sort_merge_join_desc_8.q b/ql/src/test/queries/clientpositive/sort_merge_join_desc_8.q index 4c0975d..89d2c9d 100644 --- a/ql/src/test/queries/clientpositive/sort_merge_join_desc_8.q +++ b/ql/src/test/queries/clientpositive/sort_merge_join_desc_8.q @@ -22,7 +22,7 @@ insert overwrite table table_desc4 select key, concat(value,"_2") as value2 from set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; -- columns are sorted by one key in first table, two keys in second table but in same sort order for key. Hence SMB join should pass explain diff --git a/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q b/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q index 191d8c6..eb42ef2 100644 --- a/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q +++ b/ql/src/test/queries/clientpositive/vectorized_bucketmapjoin1.q @@ -29,7 +29,7 @@ set hive.vectorized.execution.enabled=true; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; - +set hive.cbo.enable=false; explain vectorization expression select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key; diff --git a/ql/src/test/results/clientpositive/comments.q.out b/ql/src/test/results/clientpositive/comments.q.out index 4395cb2..53a766f 100644 --- a/ql/src/test/results/clientpositive/comments.q.out +++ b/ql/src/test/results/clientpositive/comments.q.out @@ -76,34 +76,38 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a + $hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key > 0) (type: boolean) + predicate: (UDFToDouble(key) > 0.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -112,28 +116,32 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key > 0) (type: boolean) + predicate: (UDFToDouble(key) > 0.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -186,7 +194,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [b] + /src [$hdt$_1:b] Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/join25.q.out b/ql/src/test/results/clientpositive/join25.q.out index 5f3f01c..5ad95c5 100644 --- a/ql/src/test/results/clientpositive/join25.q.out +++ b/ql/src/test/results/clientpositive/join25.q.out @@ -17,37 +17,36 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - x + $hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - x + $hdt$_0:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -56,38 +55,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-0 Move Operator tables: @@ -98,39 +92,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join26.q.out b/ql/src/test/results/clientpositive/join26.q.out index 8004208..5bc8245 100644 --- a/ql/src/test/results/clientpositive/join26.q.out +++ b/ql/src/test/results/clientpositive/join26.q.out @@ -19,59 +19,62 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.key = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-1 depends on stages: Stage-10 - Stage-9 depends on stages: Stage-1 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 - Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - x + $hdt$_1:y Fetch Operator limit: -1 - y + $hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - x + $hdt$_1:y TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 - y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 2 + $hdt$_2:x TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 2 - Stage: Stage-1 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -82,53 +85,57 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col6, _col11 - Position of Big Table: 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + Position of Big Table: 2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Select Operator + expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -136,12 +143,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 @@ -150,13 +154,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -166,215 +168,128 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - - Stage: Stage-9 - Conditional Operator - - Stage: Stage-6 - Move Operator - files: - hdfs directory: true + name: default.src + name: default.src #### A masked pattern was here #### - - Stage: Stage-0 - Move Operator - tables: - replace: true + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - table: + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1 { string key, string value, string val2} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.src1 + name: default.src1 #### A masked pattern was here #### Partition - base file name: -ext-10002 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1 { string key, string value, string val2} + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} bucket_count -1 column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - name default.dest_j1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1 { string key, string value, string val2} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - name: default.dest_j1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: -#### A masked pattern was here #### + /srcpart/ds=2008-04-08/hr=11 [$hdt$_0:z] - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10002 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 + Stage: Stage-0 + Move Operator + tables: + replace: true #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - + table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -396,14 +311,9 @@ STAGE PLANS: #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - name: default.dest_j1 - Truncated Path -> Alias: -#### A masked pattern was here #### - Stage: Stage-8 - Move Operator - files: - hdfs directory: true + Stage: Stage-2 + Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 diff --git a/ql/src/test/results/clientpositive/join27.q.out b/ql/src/test/results/clientpositive/join27.q.out index fce7d95..8b43f3f 100644 --- a/ql/src/test/results/clientpositive/join27.q.out +++ b/ql/src/test/results/clientpositive/join27.q.out @@ -17,37 +17,36 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - x + $hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - x + $hdt$_0:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 value (type: string) - 1 value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -56,38 +55,33 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 value (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-0 Move Operator tables: @@ -98,39 +92,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) diff --git a/ql/src/test/results/clientpositive/join30.q.out b/ql/src/test/results/clientpositive/join30.q.out index 9d29cef..3bd6db1 100644 --- a/ql/src/test/results/clientpositive/join30.q.out +++ b/ql/src/test/results/clientpositive/join30.q.out @@ -15,32 +15,36 @@ INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-1 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - x + $hdt$_0:$hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - x + $hdt$_0:$hdt$_0:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -49,26 +53,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -116,7 +124,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, ] POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join36.q.out b/ql/src/test/results/clientpositive/join36.q.out index 206d5ff..43a091f 100644 --- a/ql/src/test/results/clientpositive/join36.q.out +++ b/ql/src/test/results/clientpositive/join36.q.out @@ -57,37 +57,36 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - x + $hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - x + $hdt$_0:x TableScan alias: x Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) + Select Operator + expressions: key (type: int), cnt (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -96,38 +95,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: int), cnt (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-0 Move Operator tables: @@ -138,39 +132,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join37.q.out b/ql/src/test/results/clientpositive/join37.q.out index 5ee88f7..b0a2ee3 100644 --- a/ql/src/test/results/clientpositive/join37.q.out +++ b/ql/src/test/results/clientpositive/join37.q.out @@ -17,37 +17,36 @@ SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - x + $hdt$_0:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - x + $hdt$_0:x TableScan alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -56,38 +55,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-0 Move Operator tables: @@ -98,39 +92,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join38.q.out b/ql/src/test/results/clientpositive/join38.q.out index aca06b2..60c54ce 100644 --- a/ql/src/test/results/clientpositive/join38.q.out +++ b/ql/src/test/results/clientpositive/join38.q.out @@ -49,59 +49,71 @@ where b.col11 = 111 group by a.value, b.col5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - a + $hdt$_0:$hdt$_1:b Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a + $hdt$_0:$hdt$_1:b TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 126 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 111) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 col11 (type: string) + predicate: (UDFToDouble(col11) = 111.0) (type: boolean) + Statistics: Num rows: 1 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col5 (type: string), col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 63 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 126 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (col11 = 111) (type: boolean) - Statistics: Num rows: 1 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 col11 (type: string) - outputColumnNames: _col1, _col10 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col1 (type: string), _col10 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + predicate: (UDFToDouble(key) = 111.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/join39.q.out b/ql/src/test/results/clientpositive/join39.q.out index ee99675..628a44a 100644 --- a/ql/src/test/results/clientpositive/join39.q.out +++ b/ql/src/test/results/clientpositive/join39.q.out @@ -17,30 +17,25 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-1 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - y:src + $hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - y:src + $hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <= 100) (type: boolean) + predicate: (UDFToDouble(key) <= 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -48,25 +43,25 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -80,15 +75,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-8 - Conditional Operator - - Stage: Stage-5 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-0 Move Operator tables: @@ -99,39 +85,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-7 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join40.q.out b/ql/src/test/results/clientpositive/join40.q.out index 949c8fb..c9ace8d 100644 --- a/ql/src/test/results/clientpositive/join40.q.out +++ b/ql/src/test/results/clientpositive/join40.q.out @@ -3081,24 +3081,24 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - y:src + $hdt$_1:src Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - y:src + $hdt$_1:src TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <= 100) (type: boolean) + predicate: (UDFToDouble(key) <= 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -3106,25 +3106,25 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3726,14 +3726,19 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(1) FROM SRC A JOIN SRC B ON (A.KEY=B.KEY) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -3744,11 +3749,14 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -3759,39 +3767,24 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/join_on_varchar.q.out b/ql/src/test/results/clientpositive/join_on_varchar.q.out index ace445d..ac2bddd 100644 --- a/ql/src/test/results/clientpositive/join_on_varchar.q.out +++ b/ql/src/test/results/clientpositive/join_on_varchar.q.out @@ -59,31 +59,35 @@ POSTHOOK: query: explain select /*+ MAPJOIN(tbl2) */ c1,c2 from tbl1 join tbl2 on (c1 = c2) order by c1,c2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - tbl2 + $hdt$_1:tbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - tbl2 + $hdt$_1:tbl2 TableScan alias: tbl2 Statistics: Num rows: 2 Data size: 23 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c2 is not null (type: boolean) Statistics: Num rows: 2 Data size: 23 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 c1 (type: varchar(30)) - 1 c2 (type: varchar(30)) + Select Operator + expressions: c2 (type: varchar(30)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 23 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: varchar(30)) + 1 _col0 (type: varchar(30)) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -92,16 +96,16 @@ STAGE PLANS: Filter Operator predicate: c1 is not null (type: boolean) Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 c1 (type: varchar(30)) - 1 c2 (type: varchar(30)) - outputColumnNames: _col0, _col5 - Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: varchar(10)), _col5 (type: varchar(30)) + Select Operator + expressions: c1 (type: varchar(10)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: varchar(30)) + 1 _col0 (type: varchar(30)) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/join_reorder.q.out b/ql/src/test/results/clientpositive/join_reorder.q.out index 5cef0c9..b103f97 100644 --- a/ql/src/test/results/clientpositive/join_reorder.q.out +++ b/ql/src/test/results/clientpositive/join_reorder.q.out @@ -64,50 +64,46 @@ STAGE PLANS: alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + value expressions: key (type: string), val (type: string) TableScan alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (UDFToDouble(_col0) + 1.0) (type: double) - sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) + 1.0) (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 UDFToDouble(_col0) (type: double) - 1 (UDFToDouble(_col0) + 1.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + 0 UDFToDouble(key) (type: double) + 1 (key + 1) (type: double) + outputColumnNames: _col0, _col1, _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -234,36 +230,28 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: val (type: string) TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -281,30 +269,26 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string) + value expressions: _col0 (type: string), _col5 (type: string) TableScan alias: c Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: val (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Right Outer Join0 to 1 keys: 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col11 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/join_reorder4.q.out b/ql/src/test/results/clientpositive/join_reorder4.q.out index c41407f..bbc1d86 100644 --- a/ql/src/test/results/clientpositive/join_reorder4.q.out +++ b/ql/src/test/results/clientpositive/join_reorder4.q.out @@ -51,78 +51,71 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - c - TableScan - alias: c - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key3 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val1 (type: string) TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val2 (type: string) + TableScan + alias: c + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key3 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: val3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -148,78 +141,71 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(b) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - c - TableScan - alias: c - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key3 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val1 (type: string) TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val2 (type: string) + TableScan + alias: c + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key3 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: val3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -245,78 +231,71 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(c) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-5 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - c - TableScan - alias: c - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key3 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val1 (type: string) TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val2 (type: string) + TableScan + alias: c + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key3 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: val3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index 93efa3c..66fb142 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -54,36 +54,44 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: unknown Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + predicate: ((ds = '2008-04-08') and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: unknown Reducer 2 @@ -94,13 +102,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -178,36 +186,44 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: unknown Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and (ds = '2008-04-08')) (type: boolean) - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + predicate: ((ds = '2008-04-08') and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: unknown Reducer 2 @@ -218,13 +234,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -396,15 +412,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -468,15 +488,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -540,13 +564,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -754,15 +778,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -826,15 +854,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -898,13 +930,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index cea632b..d71a575 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -124,15 +124,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -198,15 +202,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -270,13 +278,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -488,15 +496,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -562,15 +574,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -634,13 +650,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -869,15 +885,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -943,15 +963,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 6588 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 58 Data size: 6588 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 58 Data size: 6588 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -1064,13 +1088,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 63 Data size: 7246 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 63 Data size: 7246 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index 4aa0e0e..1bc8ff4 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -148,15 +148,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -222,15 +226,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -294,13 +302,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 Position of Big Table: 1 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -512,15 +520,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 3294 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -586,15 +598,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -658,13 +674,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 Position of Big Table: 1 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index 21ce1f8..c50d3e6 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -148,15 +148,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -220,15 +224,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -290,13 +298,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 0 Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -496,15 +504,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -568,15 +580,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -638,13 +654,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 0 Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 53839a3..e806ef3 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -5471,7 +5471,7 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c ON b.key = c.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing feature [Hint]. +Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) @@ -5481,33 +5481,39 @@ Stage-0 limit:-1 Stage-1 Map 1 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=3 width=33) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Map Join Operator [MAPJOIN_31] (rows=3 width=33) - Conds:FIL_27.key=RS_7.key(Inner),RS_7.key=RS_9.key(Inner),RS_9.key=RS_11.key(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11","_col15","_col16"] - <-Map 2 [BROADCAST_EDGE] llap - BROADCAST [RS_7] - PartitionCols:key - Filter Operator [FIL_28] (rows=1 width=30) + File Output Operator [FS_18] + Map Join Operator [MAPJOIN_35] (rows=3 width=33) + Conds:SEL_2._col0=RS_13._col0(Inner),RS_13._col0=RS_14._col0(Inner),RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 2 [BROADCAST_EDGE] llap + BROADCAST [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=1 width=30) + Output:["_col0","_col1"] + Filter Operator [FIL_32] (rows=1 width=30) predicate:key is not null - TableScan [TS_1] (rows=1 width=30) + TableScan [TS_3] (rows=1 width=30) default@t2,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map 3 [BROADCAST_EDGE] llap - BROADCAST [RS_9] - PartitionCols:key - Filter Operator [FIL_29] (rows=1 width=20) + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_14] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1 width=20) + Output:["_col0","_col1"] + Filter Operator [FIL_33] (rows=1 width=20) predicate:key is not null - TableScan [TS_2] (rows=1 width=20) + TableScan [TS_6] (rows=1 width=20) default@t3,c,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map 4 [BROADCAST_EDGE] llap - BROADCAST [RS_11] - PartitionCols:key - Filter Operator [FIL_30] (rows=1 width=30) + <-Map 4 [BROADCAST_EDGE] llap + BROADCAST [RS_15] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=1 width=30) + Output:["_col0","_col1"] + Filter Operator [FIL_34] (rows=1 width=30) predicate:key is not null - TableScan [TS_3] (rows=1 width=30) + TableScan [TS_9] (rows=1 width=30) default@t4,d,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Filter Operator [FIL_27] (rows=1 width=30) + <-Select Operator [SEL_2] (rows=1 width=30) + Output:["_col0","_col1"] + Filter Operator [FIL_31] (rows=1 width=30) predicate:key is not null TableScan [TS_0] (rows=1 width=30) default@t1,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] @@ -5524,7 +5530,7 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c ON b.key = c.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing feature [Hint]. +Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) @@ -5534,33 +5540,39 @@ Stage-0 limit:-1 Stage-1 Map 1 llap - File Output Operator [FS_14] - Select Operator [SEL_13] (rows=3 width=33) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Map Join Operator [MAPJOIN_31] (rows=3 width=33) - Conds:FIL_27.key=RS_7.key(Inner),RS_7.key=RS_9.key(Inner),RS_9.key=RS_11.key(Inner),Output:["_col0","_col1","_col5","_col6","_col10","_col11","_col15","_col16"] - <-Map 2 [BROADCAST_EDGE] llap - BROADCAST [RS_7] - PartitionCols:key - Filter Operator [FIL_28] (rows=1 width=30) + File Output Operator [FS_18] + Map Join Operator [MAPJOIN_35] (rows=3 width=33) + Conds:SEL_2._col0=RS_13._col0(Inner),RS_13._col0=RS_14._col0(Inner),RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 2 [BROADCAST_EDGE] llap + BROADCAST [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=1 width=30) + Output:["_col0","_col1"] + Filter Operator [FIL_32] (rows=1 width=30) predicate:key is not null - TableScan [TS_1] (rows=1 width=30) + TableScan [TS_3] (rows=1 width=30) default@t2,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map 3 [BROADCAST_EDGE] llap - BROADCAST [RS_9] - PartitionCols:key - Filter Operator [FIL_29] (rows=1 width=20) + <-Map 3 [BROADCAST_EDGE] llap + BROADCAST [RS_14] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1 width=20) + Output:["_col0","_col1"] + Filter Operator [FIL_33] (rows=1 width=20) predicate:key is not null - TableScan [TS_2] (rows=1 width=20) + TableScan [TS_6] (rows=1 width=20) default@t3,c,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map 4 [BROADCAST_EDGE] llap - BROADCAST [RS_11] - PartitionCols:key - Filter Operator [FIL_30] (rows=1 width=30) + <-Map 4 [BROADCAST_EDGE] llap + BROADCAST [RS_15] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=1 width=30) + Output:["_col0","_col1"] + Filter Operator [FIL_34] (rows=1 width=30) predicate:key is not null - TableScan [TS_3] (rows=1 width=30) + TableScan [TS_9] (rows=1 width=30) default@t4,d,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Filter Operator [FIL_27] (rows=1 width=30) + <-Select Operator [SEL_2] (rows=1 width=30) + Output:["_col0","_col1"] + Filter Operator [FIL_31] (rows=1 width=30) predicate:key is not null TableScan [TS_0] (rows=1 width=30) default@t1,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] @@ -5569,7 +5581,7 @@ PREHOOK: query: explain FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMT PREHOOK: type: QUERY POSTHOOK: query: explain FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing feature [Hint]. +Plan optimized by CBO. Vertex dependency in root stage Map 2 <- Map 1 (BROADCAST_EDGE) @@ -5581,29 +5593,35 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_13] - Group By Operator [GBY_11] (rows=1 width=24) + File Output Operator [FS_16] + Group By Operator [GBY_14] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_10] - Group By Operator [GBY_9] (rows=1 width=24) + PARTITION_ONLY_SHUFFLE [RS_13] + Group By Operator [GBY_12] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Map 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_8] + PARTITION_ONLY_SHUFFLE [RS_11] PartitionCols:rand() - Map Join Operator [MAPJOIN_18] (rows=550 width=87) - Conds:RS_3.UDFToDouble(key)=FIL_17.(key + 1)(Inner),Output:["_col0","_col1","_col5"] - <-Map 1 [BROADCAST_EDGE] llap - BROADCAST [RS_3] - PartitionCols:UDFToDouble(key) - Filter Operator [FIL_16] (rows=1 width=30) - predicate:UDFToDouble(key) is not null - TableScan [TS_0] (rows=1 width=30) - default@t1,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Filter Operator [FIL_17] (rows=500 width=87) - predicate:(key + 1) is not null - TableScan [TS_1] (rows=500 width=87) - default@src,c,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Select Operator [SEL_9] (rows=550 width=87) + Output:["_col0","_col1","_col2"] + Map Join Operator [MAPJOIN_21] (rows=550 width=87) + Conds:RS_6.UDFToDouble(_col0)=SEL_5.(UDFToDouble(_col0) + 1.0)(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [BROADCAST_EDGE] llap + BROADCAST [RS_6] + PartitionCols:UDFToDouble(_col0) + Select Operator [SEL_2] (rows=1 width=30) + Output:["_col0","_col1"] + Filter Operator [FIL_19] (rows=1 width=30) + predicate:key is not null + TableScan [TS_0] (rows=1 width=30) + default@t1,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Select Operator [SEL_5] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_20] (rows=500 width=87) + predicate:key is not null + TableScan [TS_3] (rows=500 width=87) + default@src,c,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: FROM T1 a JOIN src c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) PREHOOK: type: QUERY @@ -5666,7 +5684,7 @@ PREHOOK: query: explain select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.val POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing feature [Hint]. +Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Map 4 (BROADCAST_EDGE) @@ -5678,29 +5696,35 @@ Stage-0 limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_13] - Group By Operator [GBY_11] (rows=1 width=16) + File Output Operator [FS_16] + Group By Operator [GBY_14] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_10] - Group By Operator [GBY_9] (rows=1 width=16) + PARTITION_ONLY_SHUFFLE [RS_13] + Group By Operator [GBY_12] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_8] + PARTITION_ONLY_SHUFFLE [RS_11] PartitionCols:rand() - Map Join Operator [MAPJOIN_18] (rows=1 width=33) - Conds:FIL_16.key=RS_5.val(Inner),Output:["_col0","_col6"] - <-Map 4 [BROADCAST_EDGE] llap - BROADCAST [RS_5] - PartitionCols:val - Filter Operator [FIL_17] (rows=1 width=30) - predicate:val is not null - TableScan [TS_1] (rows=1 width=30) - default@t1,v,Tbl:COMPLETE,Col:NONE,Output:["val"] - <-Filter Operator [FIL_16] (rows=1 width=30) - predicate:key is not null - TableScan [TS_0] (rows=1 width=30) - default@t1,k,Tbl:COMPLETE,Col:NONE,Output:["key"] + Select Operator [SEL_9] (rows=1 width=33) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_21] (rows=1 width=33) + Conds:SEL_2._col0=RS_7._col0(Inner),Output:["_col0","_col1"] + <-Map 4 [BROADCAST_EDGE] llap + BROADCAST [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=1 width=30) + Output:["_col0"] + Filter Operator [FIL_20] (rows=1 width=30) + predicate:val is not null + TableScan [TS_3] (rows=1 width=30) + default@t1,v,Tbl:COMPLETE,Col:NONE,Output:["val"] + <-Select Operator [SEL_2] (rows=1 width=30) + Output:["_col0"] + Filter Operator [FIL_19] (rows=1 width=30) + predicate:key is not null + TableScan [TS_0] (rows=1 width=30) + default@t1,k,Tbl:COMPLETE,Col:NONE,Output:["key"] PREHOOK: query: explain select sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key PREHOOK: type: QUERY @@ -5838,7 +5862,7 @@ PREHOOK: query: explain FROM T1 a RIGHT OUTER JOIN T2 c ON c.key+1=a.key select PREHOOK: type: QUERY POSTHOOK: query: explain FROM T1 a RIGHT OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing feature [Hint]. +Plan optimized by CBO. Vertex dependency in root stage Map 2 <- Map 1 (BROADCAST_EDGE) @@ -5850,31 +5874,37 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_11] - Group By Operator [GBY_9] (rows=1 width=24) + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_8] - Group By Operator [GBY_7] (rows=1 width=24) + PARTITION_ONLY_SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Map 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_6] + PARTITION_ONLY_SHUFFLE [RS_9] PartitionCols:rand() - Map Join Operator [MAPJOIN_14] (rows=1 width=33) - Conds:RS_2.UDFToDouble(key)=TS_1.(key + 1)(Right Outer),Output:["_col0","_col1","_col5"] - <-Map 1 [BROADCAST_EDGE] llap - BROADCAST [RS_2] - PartitionCols:UDFToDouble(key) - TableScan [TS_0] (rows=1 width=30) - default@t1,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-TableScan [TS_1] (rows=1 width=30) - default@t2,c,Tbl:COMPLETE,Col:NONE,Output:["key"] + Select Operator [SEL_7] (rows=1 width=33) + Output:["_col0","_col1","_col2"] + Map Join Operator [MAPJOIN_17] (rows=1 width=33) + Conds:RS_4.UDFToDouble(_col0)=SEL_3.(UDFToDouble(_col0) + 1.0)(Right Outer),Output:["_col0","_col1","_col2"] + <-Map 1 [BROADCAST_EDGE] llap + BROADCAST [RS_4] + PartitionCols:UDFToDouble(_col0) + Select Operator [SEL_1] (rows=1 width=30) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=1 width=30) + default@t1,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Select Operator [SEL_3] (rows=1 width=30) + Output:["_col0"] + TableScan [TS_2] (rows=1 width=30) + default@t2,c,Tbl:COMPLETE,Col:NONE,Output:["key"] PREHOOK: query: explain FROM T1 a FULL OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) PREHOOK: type: QUERY POSTHOOK: query: explain FROM T1 a FULL OUTER JOIN T2 c ON c.key+1=a.key select /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing feature [Hint]. +Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) @@ -5886,34 +5916,40 @@ Stage-0 limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_11] - Group By Operator [GBY_9] (rows=1 width=24) + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_8] - Group By Operator [GBY_7] (rows=1 width=24) + PARTITION_ONLY_SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_6] + PARTITION_ONLY_SHUFFLE [RS_9] PartitionCols:rand() - Merge Join Operator [MERGEJOIN_12] (rows=1 width=33) - Conds:RS_2.UDFToDouble(key)=RS_3.(key + 1)(Outer),Output:["_col0","_col1","_col5"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_2] - PartitionCols:UDFToDouble(key) - TableScan [TS_0] (rows=1 width=30) - default@t1,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_3] - PartitionCols:(key + 1) - TableScan [TS_1] (rows=1 width=30) - default@t2,c,Tbl:COMPLETE,Col:NONE,Output:["key"] + Select Operator [SEL_7] (rows=1 width=33) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_15] (rows=1 width=33) + Conds:RS_4.UDFToDouble(_col0)=RS_5.(UDFToDouble(_col0) + 1.0)(Outer),Output:["_col0","_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:UDFToDouble(_col0) + Select Operator [SEL_1] (rows=1 width=30) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=1 width=30) + default@t1,a,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_5] + PartitionCols:(UDFToDouble(_col0) + 1.0) + Select Operator [SEL_3] (rows=1 width=30) + Output:["_col0"] + TableScan [TS_2] (rows=1 width=30) + default@t2,c,Tbl:COMPLETE,Col:NONE,Output:["key"] PREHOOK: query: explain select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k left outer join T1 v on k.key+1=v.key PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k left outer join T1 v on k.key+1=v.key POSTHOOK: type: QUERY -Plan not optimized by CBO due to missing feature [Hint]. +Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Map 4 (BROADCAST_EDGE) @@ -5925,23 +5961,29 @@ Stage-0 limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_11] - Group By Operator [GBY_9] (rows=1 width=16) + File Output Operator [FS_14] + Group By Operator [GBY_12] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_8] - Group By Operator [GBY_7] (rows=1 width=16) + PARTITION_ONLY_SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_6] + PARTITION_ONLY_SHUFFLE [RS_9] PartitionCols:rand() - Map Join Operator [MAPJOIN_14] (rows=1 width=33) - Conds:TS_0.(key + 1)=RS_3.UDFToDouble(key)(Left Outer),Output:["_col0","_col6"] - <-Map 4 [BROADCAST_EDGE] llap - BROADCAST [RS_3] - PartitionCols:UDFToDouble(key) - TableScan [TS_1] (rows=1 width=30) - default@t1,v,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-TableScan [TS_0] (rows=1 width=30) - default@t1,k,Tbl:COMPLETE,Col:NONE,Output:["key"] + Select Operator [SEL_7] (rows=1 width=33) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_17] (rows=1 width=33) + Conds:SEL_1.(UDFToDouble(_col0) + 1.0)=RS_5.UDFToDouble(_col0)(Left Outer),Output:["_col0","_col2"] + <-Map 4 [BROADCAST_EDGE] llap + BROADCAST [RS_5] + PartitionCols:UDFToDouble(_col0) + Select Operator [SEL_3] (rows=1 width=30) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=1 width=30) + default@t1,v,Tbl:COMPLETE,Col:NONE,Output:["key","val"] + <-Select Operator [SEL_1] (rows=1 width=30) + Output:["_col0"] + TableScan [TS_0] (rows=1 width=30) + default@t1,k,Tbl:COMPLETE,Col:NONE,Output:["key"] diff --git a/ql/src/test/results/clientpositive/llap/join_filters.q.out b/ql/src/test/results/clientpositive/llap/join_filters.q.out index b730d30..191c976 100644 --- a/ql/src/test/results/clientpositive/llap/join_filters.q.out +++ b/ql/src/test/results/clientpositive/llap/join_filters.q.out @@ -356,7 +356,7 @@ NULL NULL NULL NULL 12 35 NULL NULL NULL NULL 48 NULL NULL NULL NULL NULL NULL 40 AzUxen/yR7DlsL00zfSITA== -Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -407,7 +407,7 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 YaI1msgLVpfEx943Tfea/Q== -Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -1112,7 +1112,7 @@ NULL NULL NULL NULL 12 35 NULL NULL NULL NULL 48 NULL NULL NULL NULL NULL NULL 40 AzUxen/yR7DlsL00zfSITA== -Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -1163,7 +1163,7 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 YaI1msgLVpfEx943Tfea/Q== -Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git a/ql/src/test/results/clientpositive/llap/join_nulls.q.out b/ql/src/test/results/clientpositive/llap/join_nulls.q.out index c9876d3..5427b9f 100644 --- a/ql/src/test/results/clientpositive/llap/join_nulls.q.out +++ b/ql/src/test/results/clientpositive/llap/join_nulls.q.out @@ -277,7 +277,7 @@ POSTHOOK: Input: default@myinput1 100 100 100 100 100 100 NULL 35 NULL 35 NULL 35 NULL NULL NULL NULL 48 NULL -Warning: Shuffle Join MERGEJOIN[7][tables = [a, b]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git a/ql/src/test/results/clientpositive/llap/skewjoin.q.out b/ql/src/test/results/clientpositive/llap/skewjoin.q.out index 1251b67..47a8f86 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -224,12 +224,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -240,12 +244,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -256,12 +264,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -272,12 +284,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -289,23 +305,19 @@ STAGE PLANS: Inner Join 1 to 2 Inner Join 2 to 3 keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + 3 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 3 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 99 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 99 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -366,12 +378,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -382,12 +398,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -398,12 +418,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -414,12 +438,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -431,23 +459,19 @@ STAGE PLANS: Inner Join 1 to 2 Inner Join 2 to 3 keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + 3 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 3 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 99 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 99 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -499,14 +523,18 @@ STAGE PLANS: alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), val (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -515,14 +543,18 @@ STAGE PLANS: alias: c Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key + 1) is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: key (type: string) + Reduce Output Operator + key expressions: (UDFToDouble(_col0) + 1.0) (type: double) + sort order: + + Map-reduce partition columns: (UDFToDouble(_col0) + 1.0) (type: double) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -532,19 +564,23 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 UDFToDouble(key) (type: double) - 1 (key + 1) (type: double) - outputColumnNames: _col0, _col1, _col5 + 0 UDFToDouble(_col0) (type: double) + 1 (UDFToDouble(_col0) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1030,12 +1066,16 @@ STAGE PLANS: TableScan alias: k Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (key + 1) (type: double) - sort order: + - Map-reduce partition columns: (key + 1) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + Reduce Output Operator + key expressions: (UDFToDouble(_col0) + 1.0) (type: double) + sort order: + + Map-reduce partition columns: (UDFToDouble(_col0) + 1.0) (type: double) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1043,12 +1083,16 @@ STAGE PLANS: TableScan alias: v Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: val (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1058,19 +1102,23 @@ STAGE PLANS: condition map: Left Outer Join0 to 1 keys: - 0 (key + 1) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col6 + 0 (UDFToDouble(_col0) + 1.0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col6)) - mode: hash + Select Operator + expressions: hash(_col0) (type: int), hash(_col2) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out index 8dee9ff..ddb7624 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out @@ -69,11 +69,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 4 @@ -84,11 +88,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -98,8 +106,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -185,11 +193,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 5 @@ -200,11 +212,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -214,8 +230,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -333,11 +349,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 5 @@ -348,11 +368,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -362,30 +386,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count() keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE @@ -393,10 +414,10 @@ STAGE PLANS: aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -405,10 +426,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -632,13 +653,17 @@ STAGE PLANS: alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -649,8 +674,8 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1039,12 +1064,12 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key + 1) (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -1058,12 +1083,12 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key + 1) (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -1180,13 +1205,17 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1197,8 +1226,8 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1293,13 +1322,17 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1310,8 +1343,8 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1580,13 +1613,17 @@ STAGE PLANS: alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1597,8 +1634,8 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out index 20766c2..df89454 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out @@ -61,15 +61,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -137,15 +141,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -211,25 +219,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - auto parallelism: false + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false @@ -371,15 +375,19 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), key2 (type: int) - null sort order: aa - sort order: ++ - Map-reduce partition columns: key (type: int), key2 (type: int) + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col2 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -447,15 +455,19 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), key2 (type: int) - null sort order: aa - sort order: ++ - Map-reduce partition columns: key (type: int), key2 (type: int) + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col2 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -521,25 +533,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int), key2 (type: int) - 1 key (type: int), key2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false @@ -627,17 +635,21 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key2 is not null and key is not null) (type: boolean) + predicate: (key is not null and key2 is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key2 (type: int), key (type: int) - null sort order: aa - sort order: ++ - Map-reduce partition columns: key2 (type: int), key (type: int) + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col2 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -703,17 +715,21 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key2 is not null and key is not null) (type: boolean) + predicate: (key is not null and key2 is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key2 (type: int), key (type: int) - null sort order: aa - sort order: ++ - Map-reduce partition columns: key2 (type: int), key (type: int) + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col2 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -779,25 +795,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key2 (type: int), key (type: int) - 1 key2 (type: int), key (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false @@ -887,15 +899,19 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: string) + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: key2 (type: int) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: int) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -963,15 +979,19 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: string) - null sort order: aa - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: string) + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: key2 (type: int) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: int) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -1037,25 +1057,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int), value (type: string) - 1 key (type: int), value (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + 0 _col0 (type: int), _col2 (type: string) + 1 _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out index a97a692..3ffef3b 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out @@ -197,11 +197,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 4 @@ -212,11 +216,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 5 @@ -227,11 +235,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 6 @@ -242,11 +254,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 7 @@ -257,11 +273,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 8 @@ -272,11 +292,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Map 9 @@ -287,11 +311,15 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -306,13 +334,13 @@ STAGE PLANS: Inner Join 0 to 5 Inner Join 0 to 6 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + 3 _col0 (type: int) + 4 _col0 (type: int) + 5 _col0 (type: int) + 6 _col0 (type: int) Statistics: Num rows: 66 Data size: 462 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out index 7d7d96d..776b459 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out @@ -72,12 +72,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -88,12 +92,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -104,12 +112,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -120,22 +132,18 @@ STAGE PLANS: Inner Join 0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -178,12 +186,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -191,12 +206,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -204,12 +226,19 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -217,25 +246,21 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -278,12 +303,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -291,12 +320,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -304,12 +337,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -320,22 +357,18 @@ STAGE PLANS: Left Outer Join0 to 1 Left Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -383,12 +416,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -396,12 +433,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -409,12 +450,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -425,22 +470,18 @@ STAGE PLANS: Left Outer Join0 to 1 Right Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -489,12 +530,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -502,12 +547,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -515,12 +564,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -531,22 +584,18 @@ STAGE PLANS: Left Outer Join0 to 1 Outer Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -600,12 +649,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -613,12 +669,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -626,12 +689,19 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -642,22 +712,18 @@ STAGE PLANS: Right Outer Join0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -702,12 +768,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -715,12 +785,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -728,12 +802,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -744,22 +822,18 @@ STAGE PLANS: Right Outer Join0 to 1 Left Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -806,12 +880,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -819,12 +897,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -832,12 +914,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -848,22 +934,18 @@ STAGE PLANS: Right Outer Join0 to 1 Right Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -912,12 +994,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -925,12 +1011,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -938,12 +1028,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -954,22 +1048,18 @@ STAGE PLANS: Right Outer Join0 to 1 Outer Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1020,12 +1110,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1033,12 +1130,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1046,12 +1150,19 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1059,25 +1170,21 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Right Outer Join0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1122,12 +1229,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1135,12 +1246,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1148,12 +1263,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1164,22 +1283,18 @@ STAGE PLANS: Outer Join 0 to 1 Left Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1231,12 +1346,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1244,12 +1363,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1257,12 +1380,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1273,22 +1400,18 @@ STAGE PLANS: Outer Join 0 to 1 Right Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1337,12 +1460,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1350,12 +1477,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1363,12 +1494,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1379,22 +1514,18 @@ STAGE PLANS: Outer Join 0 to 1 Outer Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out index 1abe220..0ddfdde 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out @@ -72,12 +72,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -88,12 +92,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -104,12 +112,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -120,22 +132,18 @@ STAGE PLANS: Inner Join 0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -178,12 +186,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -191,12 +206,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -204,12 +226,19 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -217,25 +246,21 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join0 to 1 + Inner Join 0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -278,12 +303,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -291,12 +320,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -304,12 +337,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -320,22 +357,18 @@ STAGE PLANS: Left Outer Join0 to 1 Left Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -383,12 +416,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -396,12 +433,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -409,12 +450,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -425,22 +470,18 @@ STAGE PLANS: Left Outer Join0 to 1 Right Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -489,12 +530,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -502,12 +547,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -515,12 +564,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -531,22 +584,18 @@ STAGE PLANS: Left Outer Join0 to 1 Outer Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -600,12 +649,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -613,12 +669,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -626,12 +689,19 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -642,22 +712,18 @@ STAGE PLANS: Right Outer Join0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -702,12 +768,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -715,12 +785,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -728,12 +802,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -744,22 +822,18 @@ STAGE PLANS: Right Outer Join0 to 1 Left Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -806,12 +880,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -819,12 +897,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -832,12 +914,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -848,22 +934,18 @@ STAGE PLANS: Right Outer Join0 to 1 Right Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -912,12 +994,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -925,12 +1011,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -938,12 +1028,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -954,22 +1048,18 @@ STAGE PLANS: Right Outer Join0 to 1 Outer Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1020,12 +1110,19 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1033,12 +1130,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1046,12 +1150,19 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1059,25 +1170,21 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Right Outer Join0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1122,12 +1229,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1135,12 +1246,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1148,12 +1263,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1164,22 +1283,18 @@ STAGE PLANS: Outer Join 0 to 1 Left Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1231,12 +1346,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1244,12 +1363,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1257,12 +1380,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1273,22 +1400,18 @@ STAGE PLANS: Outer Join 0 to 1 Right Outer Join1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1337,12 +1460,16 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1350,12 +1477,16 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -1363,12 +1494,16 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1379,22 +1514,18 @@ STAGE PLANS: Outer Join 0 to 1 Outer Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out index bbf9b46..2c1ee96 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out @@ -84,12 +84,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -100,12 +104,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -115,22 +123,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-2 Dependency Collection @@ -1262,12 +1266,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1278,12 +1286,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1293,22 +1305,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-2 Dependency Collection @@ -2456,12 +2464,16 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -2472,12 +2484,16 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2487,22 +2503,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-2 Dependency Collection @@ -2566,12 +2578,16 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -2582,12 +2598,16 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2597,22 +2617,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-2 Dependency Collection @@ -2672,12 +2688,16 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -2688,12 +2708,16 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -2704,12 +2728,16 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2720,22 +2748,18 @@ STAGE PLANS: Inner Join 0 to 1 Inner Join 1 to 2 keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/stats11.q.out b/ql/src/test/results/clientpositive/llap/stats11.q.out index 3eccf52..c7aed07 100644 --- a/ql/src/test/results/clientpositive/llap/stats11.q.out +++ b/ql/src/test/results/clientpositive/llap/stats11.q.out @@ -326,15 +326,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -398,15 +402,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -470,13 +478,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -684,15 +692,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -756,15 +768,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: value (type: string) - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 6252 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -828,13 +844,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/table_access_keys_stats.q.out b/ql/src/test/results/clientpositive/llap/table_access_keys_stats.q.out index 48ea4ce..97a649e 100644 --- a/ql/src/test/results/clientpositive/llap/table_access_keys_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/table_access_keys_stats.q.out @@ -296,7 +296,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Operator:JOIN_6 +Operator:JOIN_8 Table:default@t1 Keys:key Table:default@t2 diff --git a/ql/src/test/results/clientpositive/mapjoin1.q.out b/ql/src/test/results/clientpositive/mapjoin1.q.out index 5601bc0..e103eff 100644 --- a/ql/src/test/results/clientpositive/mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/mapjoin1.q.out @@ -28,49 +28,47 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a + $hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit @@ -117,49 +115,53 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a + $hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 + 1 {((UDFToDouble(_col0) * UDFToDouble(10)) < UDFToDouble('1000'))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + filter predicates: + 0 + 1 {((UDFToDouble(_col0) * UDFToDouble(10)) < UDFToDouble('1000'))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit @@ -208,31 +210,35 @@ SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a + $hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 - 1 {(_col1.key > 200)} - keys: - 0 key (type: string) - 1 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col1.key) > UDFToDouble(200))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -247,26 +253,22 @@ STAGE PLANS: Right Outer Join0 to 1 filter predicates: 0 - 1 {(_col1.key > 200)} + 1 {(UDFToDouble(_col1.key) > UDFToDouble(200))} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -303,49 +305,47 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a + $hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit @@ -392,49 +392,53 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a + $hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 + 1 {((UDFToDouble(_col0) * UDFToDouble(10)) < UDFToDouble('1000'))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + filter predicates: + 0 + 1 {((UDFToDouble(_col0) * UDFToDouble(10)) < UDFToDouble('1000'))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit @@ -483,31 +487,35 @@ SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: - a + $hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - a + $hdt$_0:a TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - filter predicates: - 0 - 1 {(_col1.key > 200)} - keys: - 0 key (type: string) - 1 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col1.key) > UDFToDouble(200))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -522,26 +530,22 @@ STAGE PLANS: Right Outer Join0 to 1 filter predicates: 0 - 1 {(_col1.key > 200)} + 1 {(UDFToDouble(_col1.key) > UDFToDouble(200))} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/mapjoin_distinct.q.out b/ql/src/test/results/clientpositive/mapjoin_distinct.q.out index d49c172..525f1ef 100644 --- a/ql/src/test/results/clientpositive/mapjoin_distinct.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_distinct.q.out @@ -11,58 +11,66 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - d + $hdt$_0:c Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - d + $hdt$_0:c TableScan - alias: d + alias: c Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -78,7 +86,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -148,57 +156,65 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - d + $hdt$_0:c Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - d + $hdt$_0:c TableScan - alias: d + alias: c Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -262,53 +278,61 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-1 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-6 is a root stage + Stage-2 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: - d + $hdt$_0:c Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - d + $hdt$_0:c TableScan - alias: d + alias: c Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -324,7 +348,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -394,52 +418,60 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - d + $hdt$_0:c Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - d + $hdt$_0:c TableScan - alias: d + alias: c Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/mapjoin_hook.q.out b/ql/src/test/results/clientpositive/mapjoin_hook.q.out index 8a6743c..a9f9be3 100644 --- a/ql/src/test/results/clientpositive/mapjoin_hook.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_hook.q.out @@ -13,8 +13,8 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-4:MAPREDLOCAL -RUN: Stage-1:MAPRED +RUN: Stage-6:MAPREDLOCAL +RUN: Stage-2:MAPRED RUN: Stage-0:MOVE RUN: Stage-3:STATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) diff --git a/ql/src/test/results/clientpositive/pcs.q.out b/ql/src/test/results/clientpositive/pcs.q.out index c422a3d..dc2a476 100644 --- a/ql/src/test/results/clientpositive/pcs.q.out +++ b/ql/src/test/results/clientpositive/pcs.q.out @@ -1426,14 +1426,53 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (struct(((ds = '2000-04-08') or (key = 2)),key)) IN (const struct(true,2), const struct(false,3)) (type: boolean) + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: ds=2000-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1478,7 +1517,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.pcs_t1 name: default.pcs_t1 +#### A masked pattern was here #### Partition + base file name: ds=2000-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1523,7 +1564,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.pcs_t1 name: default.pcs_t1 +#### A masked pattern was here #### Partition + base file name: ds=2000-04-10 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1568,20 +1611,16 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.pcs_t1 name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + /pcs_t1/ds=2000-04-10 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: pcs_t1 - Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (struct(((ds = '2000-04-08') or (key = 2)),key)) IN (const struct(true,2), const struct(false,3)) (type: boolean) - Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 30 Data size: 240 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select ds from pcs_t1 where struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) PREHOOK: type: QUERY @@ -1605,14 +1644,53 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-0 - Fetch Operator - limit: -1 - Partition Description: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: pcs_t1 + Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key = 3) or ((struct(((ds = '2000-04-08') or (key = 2)),key)) IN (const struct(true,2), const struct(false,3)) and ((key + 5) > 0))) (type: boolean) + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### Partition + base file name: ds=2000-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1657,7 +1735,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.pcs_t1 name: default.pcs_t1 +#### A masked pattern was here #### Partition + base file name: ds=2000-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1702,7 +1782,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.pcs_t1 name: default.pcs_t1 +#### A masked pattern was here #### Partition + base file name: ds=2000-04-10 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1747,20 +1829,16 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.pcs_t1 name: default.pcs_t1 + Truncated Path -> Alias: + /pcs_t1/ds=2000-04-08 [pcs_t1] + /pcs_t1/ds=2000-04-09 [pcs_t1] + /pcs_t1/ds=2000-04-10 [pcs_t1] + + Stage: Stage-0 + Fetch Operator + limit: -1 Processor Tree: - TableScan - alias: pcs_t1 - Statistics: Num rows: 60 Data size: 480 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key = 3) or ((struct(((ds = '2000-04-08') or (key = 2)),key)) IN (const struct(true,2), const struct(false,3)) and ((key + 5) > 0))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select ds from pcs_t1 where key = 3 or (struct(ds='2000-04-08' or key = 2, key) in (struct(true,2), struct(false,3)) and key+5 > 0) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index e792381..477abb9 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -623,23 +623,19 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col1) (type: int), hash(_col2) (type: int) + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Conditional Operator @@ -668,22 +664,18 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - outputColumnNames: _col1, _col2 - Select Operator - expressions: hash(_col1) (type: int), hash(_col2) (type: int) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -764,7 +756,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: (key is not null and UDFToDouble(substring(value, 5)) is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -779,16 +771,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: (key is not null and (substring(value, 5) + 1) is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) + key expressions: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) sort order: ++ - Map-reduce partition columns: _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) + Map-reduce partition columns: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: @@ -798,24 +790,20 @@ STAGE PLANS: handleSkewJoin: true keys: 0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) - 1 _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) + 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double) outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Conditional Operator @@ -845,21 +833,17 @@ STAGE PLANS: 0 reducesinkkey0 (type: string), reducesinkkey1 (type: double) 1 reducesinkkey0 (type: string), reducesinkkey1 (type: double) outputColumnNames: _col2, _col3 - Select Operator - expressions: hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -950,7 +934,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + predicate: ((key < 80) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -965,33 +949,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 80.0) and (UDFToDouble(key) < 100.0)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator condition map: @@ -1004,21 +988,17 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -1061,21 +1041,17 @@ STAGE PLANS: 1 reducesinkkey0 (type: string) 2 reducesinkkey0 (type: string) outputColumnNames: _col0, _col3 - Select Operator - expressions: hash(_col0) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1139,21 +1115,17 @@ STAGE PLANS: 1 reducesinkkey0 (type: string) 2 reducesinkkey0 (type: string) outputColumnNames: _col0, _col3 - Select Operator - expressions: hash(_col0) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out index 6333f2c..3ef9934 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -33,93 +33,99 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and (ds = '2008-04-08')) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: - b {} - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((ds = '2008-04-08') and key is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 1 Map 2 - Position of Big Table: 0 + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types int:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: - b {} + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -152,13 +158,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -171,22 +178,20 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: - a {} - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + Map 3 Map Operator Tree: TableScan alias: b @@ -194,51 +199,58 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and (ds = '2008-04-08')) (type: boolean) + predicate: ((ds = '2008-04-08') and key is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 0 Map 1 - Position of Big Table: 1 + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types int:string:string - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: - a {} + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -363,212 +375,214 @@ from srcbucket_mapjoin a join srcbucket_mapjoin_part b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin + numFiles 2 numRows 0 - partition_columns ds - partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - partition_columns ds - partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + name default.srcbucket_mapjoin + numFiles 2 + numRows 0 + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: srcbucket_mapjoin + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 + partition_columns ds + partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin - numFiles 2 - numRows 0 - rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin { i32 key, string value} + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin - name: default.srcbucket_mapjoin + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin [a] + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -716,14 +730,15 @@ from srcbucket_mapjoin a join srcbucket_mapjoin_part b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -736,18 +751,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -799,12 +815,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -814,61 +825,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -922,6 +891,53 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 2 + numRows 464 + rawDataSize 8519 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -940,7 +956,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 2 numRows 464 rawDataSize 8519 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out index 58fe4ab..e9c2733 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -101,38 +101,40 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -144,118 +146,71 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -267,48 +222,95 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part + name default.srcbucket_mapjoin_part_2 partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [a] + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -460,14 +462,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -480,18 +483,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -545,12 +549,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -560,61 +559,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -668,6 +625,53 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 2 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -686,7 +690,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 2 numRows 564 rawDataSize 10503 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} @@ -836,38 +840,40 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -879,53 +885,81 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 58 Data size: 6124 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-09 + base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 + ds 2008-04-08 properties: bucket_count 2 bucket_field_name key @@ -967,132 +1001,104 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_2 name: default.srcbucket_mapjoin_part_2 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2008-04-08 + base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part + name default.srcbucket_mapjoin_part_2 partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [a] + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 2 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1111,7 +1117,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 2 numRows 564 rawDataSize 10503 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out index 06ce6f5..2598d34 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -125,38 +125,40 @@ from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -168,118 +170,71 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part + name default.srcbucket_mapjoin_part_2 partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -291,48 +246,95 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key column.name.delimiter , columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] + /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -484,14 +486,15 @@ from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -504,18 +507,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -569,12 +573,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -584,61 +583,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -692,6 +649,53 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 2 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -710,7 +714,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 2 numRows 564 rawDataSize 10503 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out index 94636f2..fb004e1 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -125,38 +125,40 @@ from srcbucket_mapjoin a join srcbucket_mapjoin b on a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -207,77 +209,30 @@ STAGE PLANS: name: default.srcbucket_mapjoin name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin [a] + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -328,7 +283,54 @@ STAGE PLANS: name: default.srcbucket_mapjoin name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin [a] + /srcbucket_mapjoin [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -468,14 +470,15 @@ from srcbucket_mapjoin a join srcbucket_mapjoin b on a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -488,18 +491,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -551,12 +555,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -566,61 +565,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 2 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -672,6 +629,53 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 2 + numRows 464 + rawDataSize 8519 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/join25.q.out b/ql/src/test/results/clientpositive/spark/join25.q.out index bd44295..05e5e70 100644 --- a/ql/src/test/results/clientpositive/spark/join25.q.out +++ b/ql/src/test/results/clientpositive/spark/join25.q.out @@ -35,10 +35,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -54,28 +58,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/join26.q.out b/ql/src/test/results/clientpositive/spark/join26.q.out index 4a85a29..b1de987 100644 --- a/ql/src/test/results/clientpositive/spark/join26.q.out +++ b/ql/src/test/results/clientpositive/spark/join26.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x @@ -39,12 +39,16 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -98,22 +102,26 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 2 + Map 3 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -121,9 +129,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 @@ -132,11 +143,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -146,92 +159,93 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col6, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Position of Big Table: 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + input vertices: + 1 Map 2 + 2 Map 3 + Position of Big Table: 0 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Select Operator + expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string + NumFilesPerFileSink: 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - name default.dest_j1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + name default.dest_j1 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -239,12 +253,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 @@ -253,13 +264,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -269,24 +278,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Stage: Stage-0 Move Operator diff --git a/ql/src/test/results/clientpositive/spark/join27.q.out b/ql/src/test/results/clientpositive/spark/join27.q.out index 0dfad72..e10d2fb 100644 --- a/ql/src/test/results/clientpositive/spark/join27.q.out +++ b/ql/src/test/results/clientpositive/spark/join27.q.out @@ -35,10 +35,14 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 value (type: string) - 1 value (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -54,28 +58,32 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 value (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/join30.q.out b/ql/src/test/results/clientpositive/spark/join30.q.out index 8360400..3584f8c 100644 --- a/ql/src/test/results/clientpositive/spark/join30.q.out +++ b/ql/src/test/results/clientpositive/spark/join30.q.out @@ -33,10 +33,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -54,28 +58,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reducer 3 @@ -124,7 +132,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] +POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, ] POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join36.q.out b/ql/src/test/results/clientpositive/spark/join36.q.out index 49167f3..b1717e0 100644 --- a/ql/src/test/results/clientpositive/spark/join36.q.out +++ b/ql/src/test/results/clientpositive/spark/join36.q.out @@ -67,18 +67,22 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: x + alias: y Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) + Select Operator + expressions: key (type: int), cnt (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -86,36 +90,40 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: int), cnt (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/join37.q.out b/ql/src/test/results/clientpositive/spark/join37.q.out index 231ca22..327e93e 100644 --- a/ql/src/test/results/clientpositive/spark/join37.q.out +++ b/ql/src/test/results/clientpositive/spark/join37.q.out @@ -35,10 +35,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -54,28 +58,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/join38.q.out b/ql/src/test/results/clientpositive/spark/join38.q.out index 5177dca..244928a 100644 --- a/ql/src/test/results/clientpositive/spark/join38.q.out +++ b/ql/src/test/results/clientpositive/spark/join38.q.out @@ -58,60 +58,72 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 2 Data size: 126 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key = 111) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 col11 (type: string) + predicate: (UDFToDouble(col11) = 111.0) (type: boolean) + Statistics: Num rows: 1 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col5 (type: string), col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2 Data size: 126 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (col11 = 111) (type: boolean) - Statistics: Num rows: 1 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 col11 (type: string) - outputColumnNames: _col1, _col10 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col1 (type: string), _col10 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + predicate: (UDFToDouble(key) = 111.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/join39.q.out b/ql/src/test/results/clientpositive/spark/join39.q.out index 5a5aeb4..183b14a 100644 --- a/ql/src/test/results/clientpositive/spark/join39.q.out +++ b/ql/src/test/results/clientpositive/spark/join39.q.out @@ -27,13 +27,13 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key <= 100) (type: boolean) + predicate: (UDFToDouble(key) <= 100.0) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -41,7 +41,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -50,24 +50,24 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/spark/join_reorder.q.out b/ql/src/test/results/clientpositive/spark/join_reorder.q.out index eae5426..fcd2167 100644 --- a/ql/src/test/results/clientpositive/spark/join_reorder.q.out +++ b/ql/src/test/results/clientpositive/spark/join_reorder.q.out @@ -69,53 +69,49 @@ STAGE PLANS: alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + value expressions: key (type: string), val (type: string) Map 3 Map Operator Tree: TableScan alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (UDFToDouble(_col0) + 1.0) (type: double) - sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) + 1.0) (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + value expressions: key (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 UDFToDouble(_col0) (type: double) - 1 (UDFToDouble(_col0) + 1.0) (type: double) - outputColumnNames: _col0, _col1, _col2 + 0 UDFToDouble(key) (type: double) + 1 (key + 1) (type: double) + outputColumnNames: _col0, _col1, _col5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -255,60 +251,48 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: val (type: string) Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: val (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Outer Join0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col5 Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 33 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string) + value expressions: _col0 (type: string), _col5 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -316,11 +300,11 @@ STAGE PLANS: Right Outer Join0 to 1 keys: 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + 1 val (type: string) + outputColumnNames: _col0, _col1, _col5, _col11 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/join_reorder4.q.out b/ql/src/test/results/clientpositive/spark/join_reorder4.q.out index 83ea4c5..8ba636c 100644 --- a/ql/src/test/results/clientpositive/spark/join_reorder4.q.out +++ b/ql/src/test/results/clientpositive/spark/join_reorder4.q.out @@ -51,16 +51,31 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val1 (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -68,14 +83,13 @@ STAGE PLANS: Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val2 (type: string) + Map 4 Map Operator Tree: TableScan alias: c @@ -83,52 +97,35 @@ STAGE PLANS: Filter Operator predicate: key3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: val3 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -154,16 +151,31 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(b) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val1 (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -171,14 +183,13 @@ STAGE PLANS: Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val2 (type: string) + Map 4 Map Operator Tree: TableScan alias: c @@ -186,52 +197,35 @@ STAGE PLANS: Filter Operator predicate: key3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: val3 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -257,16 +251,31 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(c) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val1 (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -274,14 +283,13 @@ STAGE PLANS: Filter Operator predicate: key2 is not null (type: boolean) Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: val2 (type: string) + Map 4 Map Operator Tree: TableScan alias: c @@ -289,52 +297,35 @@ STAGE PLANS: Filter Operator predicate: key3 is not null (type: boolean) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key1 (type: string) - 1 key2 (type: string) - 2 key3 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: val3 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key1 (type: string) + 1 key2 (type: string) + 2 key3 (type: string) + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 66 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/mapjoin1.q.out b/ql/src/test/results/clientpositive/spark/mapjoin1.q.out index ca489e4..aa96fc5 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin1.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin1.q.out @@ -42,13 +42,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -61,22 +62,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -136,13 +134,17 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {((UDFToDouble(_col0) * UDFToDouble(10)) < UDFToDouble('1000'))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -155,22 +157,22 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + filter predicates: + 0 + 1 {((UDFToDouble(_col0) * UDFToDouble(10)) < UDFToDouble('1000'))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -227,18 +229,22 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(_col1.key > 200)} - keys: - 0 key (type: string) - 1 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col1.key) > UDFToDouble(200))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -246,7 +252,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -260,28 +266,24 @@ STAGE PLANS: Right Outer Join0 to 1 filter predicates: 0 - 1 {(_col1.key > 200)} + 1 {(UDFToDouble(_col1.key) > UDFToDouble(200))} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -332,13 +334,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -351,22 +354,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -426,13 +426,17 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {((UDFToDouble(_col0) * UDFToDouble(10)) < UDFToDouble('1000'))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -445,22 +449,22 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + filter predicates: + 0 + 1 {((UDFToDouble(_col0) * UDFToDouble(10)) < UDFToDouble('1000'))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -517,18 +521,22 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - filter predicates: - 0 - 1 {(_col1.key > 200)} - keys: - 0 key (type: string) - 1 _col0 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + filter predicates: + 0 + 1 {(UDFToDouble(_col1.key) > UDFToDouble(200))} + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -536,7 +544,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: src @@ -550,28 +558,24 @@ STAGE PLANS: Right Outer Join0 to 1 filter predicates: 0 - 1 {(_col1.key > 200)} + 1 {(UDFToDouble(_col1.key) > UDFToDouble(200))} keys: - 0 key (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 0 Map 2 + 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out index 6b3fcc0..bdae7a5 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out @@ -28,10 +28,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -50,26 +54,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reducer 2 @@ -163,10 +171,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -184,26 +196,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reducer 2 @@ -285,10 +301,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -307,21 +327,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reducer 2 @@ -415,10 +439,14 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -436,21 +464,25 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + input vertices: + 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reducer 2 diff --git a/ql/src/test/results/clientpositive/spark/skewjoin.q.out b/ql/src/test/results/clientpositive/spark/skewjoin.q.out index dade580..fdcb4bb 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -664,23 +664,19 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col1) (type: int), hash(_col2) (type: int) + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Conditional Operator @@ -712,22 +708,18 @@ STAGE PLANS: keys: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - outputColumnNames: _col1, _col2 - Select Operator - expressions: hash(_col1) (type: int), hash(_col2) (type: int) + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -819,7 +811,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: (key is not null and UDFToDouble(substring(value, 5)) is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -836,16 +828,16 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) + predicate: (key is not null and (substring(value, 5) + 1) is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) + key expressions: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) sort order: ++ - Map-reduce partition columns: _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) + Map-reduce partition columns: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 @@ -856,24 +848,20 @@ STAGE PLANS: handleSkewJoin: true keys: 0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) - 1 _col0 (type: string), (UDFToDouble(substring(_col1, 5)) + 1.0) (type: double) + 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double) outputColumnNames: _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Conditional Operator @@ -906,21 +894,17 @@ STAGE PLANS: 0 reducesinkkey0 (type: string), reducesinkkey1 (type: double) 1 reducesinkkey0 (type: string), reducesinkkey1 (type: double) outputColumnNames: _col2, _col3 - Select Operator - expressions: hash(_col2) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1022,7 +1006,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + predicate: ((key < 80) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) @@ -1039,35 +1023,35 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 80.0) and (UDFToDouble(key) < 100.0)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -1081,21 +1065,17 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-5 Conditional Operator @@ -1141,21 +1121,17 @@ STAGE PLANS: 1 reducesinkkey0 (type: string) 2 reducesinkkey0 (type: string) outputColumnNames: _col0, _col3 - Select Operator - expressions: hash(_col0) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1228,21 +1204,17 @@ STAGE PLANS: 1 reducesinkkey0 (type: string) 2 reducesinkkey0 (type: string) outputColumnNames: _col0, _col3 - Select Operator - expressions: hash(_col0) (type: int), hash(_col3) (type: int) + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out index 68cd0fb..b5cef85 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out @@ -56,35 +56,64 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -143,40 +172,69 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) @@ -188,7 +246,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) @@ -262,45 +320,71 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE @@ -308,21 +392,21 @@ STAGE PLANS: aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -381,7 +465,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -396,24 +481,48 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -484,35 +593,64 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -607,7 +745,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -622,24 +761,48 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -722,7 +885,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -737,24 +901,48 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -808,13 +996,15 @@ select /*+mapjoin(subq1)*/ count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -822,58 +1012,53 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key + 1) (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (key + 1) (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -932,7 +1117,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -941,26 +1127,54 @@ STAGE PLANS: alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1013,7 +1227,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1028,24 +1243,48 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1108,7 +1347,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1123,26 +1363,67 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1221,35 +1502,64 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index 5372615..d652926 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -46,7 +46,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -59,30 +60,19 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - auto parallelism: false + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -138,7 +128,106 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1 [a] + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows 500 + rawDataSize 5312 + serialization.ddl struct test_table2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + name: default.test_table2 + Truncated Path -> Alias: + /test_table2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -263,7 +352,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -276,30 +366,19 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int), key2 (type: int) - 1 key (type: int), key2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col2 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -355,7 +434,106 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1 [a] + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and key2 is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + name: default.test_table2 + Truncated Path -> Alias: + /test_table2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -428,7 +606,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -439,32 +618,21 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key2 is not null and key is not null) (type: boolean) + predicate: (key is not null and key2 is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key2 (type: int), key (type: int) - 1 key2 (type: int), key (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col2 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -520,7 +688,106 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1 [a] + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and key2 is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + column.name.delimiter , + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows 500 + rawDataSize 7218 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + name: default.test_table2 + Truncated Path -> Alias: + /test_table2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -586,43 +853,46 @@ POSTHOOK: query: EXPLAIN EXTENDED SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 key (type: int), value (type: string) - 1 key (type: int), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table2 + base file name: test_table1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -635,11 +905,11 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 numRows 500 rawDataSize 7218 - serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 @@ -658,76 +928,49 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table2 + name default.test_table1 numFiles 16 numRows 500 rawDataSize 7218 - serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table2 - name: default.test_table2 + name: default.test_table1 + name: default.test_table1 Truncated Path -> Alias: - /test_table2 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /test_table1 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 key (type: int), value (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - BucketMapJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE - tag: -1 - TopN: 10 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work - Bucket Mapjoin Context: - Alias Bucket File Name Mapping: -#### A masked pattern was here #### - Alias Bucket Output File Name Mapping: -#### A masked pattern was here #### + Select Operator + expressions: key (type: int), key2 (type: int), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table1 + base file name: test_table2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -740,11 +983,11 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 numRows 500 rawDataSize 7218 - serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 @@ -763,21 +1006,42 @@ STAGE PLANS: columns.comments columns.types int:int:string #### A masked pattern was here #### - name default.test_table1 + name default.test_table2 numFiles 16 numRows 500 rawDataSize 7218 - serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 7718 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table1 - name: default.test_table1 + name: default.test_table2 + name: default.test_table2 Truncated Path -> Alias: - /test_table1 [a] + /test_table2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: string) + 1 _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + tag: -1 + TopN: 10 + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out index 25e29f6..18e6e85 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out @@ -184,7 +184,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -195,34 +196,147 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - Inner Join 0 to 4 - Inner Join 0 to 5 - Inner Join 0 to 6 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) - Statistics: Num rows: 66 Data size: 462 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: g + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + Inner Join 0 to 4 + Inner Join 0 to 5 + Inner Join 0 to 6 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + 3 _col0 (type: int) + 4 _col0 (type: int) + 5 _col0 (type: int) + 6 _col0 (type: int) + Statistics: Num rows: 66 Data size: 462 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index 476971b..0efc8ab 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -59,37 +59,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -122,34 +168,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Left Outer Join0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -182,34 +277,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -247,34 +382,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -313,34 +488,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Left Outer Join0 to 1 - Outer Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -384,34 +599,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -446,34 +710,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -510,34 +814,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -576,34 +920,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Right Outer Join0 to 1 - Outer Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -644,34 +1028,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -706,34 +1139,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Outer Join 0 to 1 - Left Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Left Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -775,34 +1248,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Outer Join 0 to 1 - Right Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Right Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -841,34 +1354,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index be2ff37..9f50e9f 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -59,37 +59,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -122,34 +168,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Left Outer Join0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -182,34 +277,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -247,34 +382,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -313,34 +488,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Left Outer Join0 to 1 - Outer Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -384,34 +599,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -446,34 +710,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -510,34 +814,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -576,34 +920,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Right Outer Join0 to 1 - Outer Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -644,34 +1028,83 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Outer Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -706,34 +1139,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Outer Join 0 to 1 - Left Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Left Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -775,34 +1248,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Outer Join 0 to 1 - Right Outer Join1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Right Outer Join1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -841,34 +1354,74 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out index 5ae3845..7cf3cf7 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out @@ -70,36 +70,64 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-0 Move Operator @@ -1214,6 +1242,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1224,26 +1254,52 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-0 Move Operator @@ -2374,36 +2430,64 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: a + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 1000) (type: boolean) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan alias: b Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-0 Move Operator @@ -2450,6 +2534,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2460,26 +2546,52 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 1000) (type: boolean) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-0 Move Operator @@ -2523,6 +2635,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2533,27 +2647,71 @@ STAGE PLANS: Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 1000) (type: boolean) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 1000) (type: boolean) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out b/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out index 923441a..90d4cbb 100644 --- a/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out @@ -296,7 +296,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -Operator:JOIN_6 +Operator:JOIN_8 Table:default@t1 Keys:key Table:default@t2 -- 2.10.1 (Apple Git-78)