diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java index 14eb3a6..e400896 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSemiJoinRule.java @@ -20,6 +20,7 @@ import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepRelVertex; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Join; @@ -37,6 +38,7 @@ import com.google.common.collect.Lists; +import java.util.ArrayList; import java.util.List; /** @@ -84,6 +86,11 @@ private HiveSemiJoinRule(RelBuilderFactory relBuilder) { // By the way, neither a super-set nor a sub-set would work. return; } + if(join.getJoinType() == JoinRelType.LEFT) { + // since for LEFT join we are only interested in rows from LEFT we can get rid of right side + call.transformTo(call.builder().push(left).project(project.getProjects(), project.getRowType().getFieldNames()).build()); + return; + } if (join.getJoinType() != JoinRelType.INNER) { return; } @@ -102,7 +109,23 @@ private HiveSemiJoinRule(RelBuilderFactory relBuilder) { final RexNode newCondition = RelOptUtil.createEquiJoinCondition(left, joinInfo.leftKeys, newRight, newRightKeys, rexBuilder); - RelNode semi = call.builder().push(left).push(aggregate.getInput()).semiJoin(newCondition).build(); + + RelNode semi = null; + //HIVE-15458: we need to add a Project on top of Join since SemiJoin with Join as it's right input + // is not expected further down the pipeline. see jira for more details + if(aggregate.getInput() instanceof HepRelVertex + && ((HepRelVertex)aggregate.getInput()).getCurrentRel() instanceof Join) { + Join rightJoin = (Join)(((HepRelVertex)aggregate.getInput()).getCurrentRel()); + List projects = new ArrayList<>(); + for(int i=0; i 'val_9') - TableScan [TS_2] (rows=500 width=178) - default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=250 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=250 width=178) - Output:["_col0","_col1"],keys:key, value - TableScan [TS_5] (rows=500 width=178) - default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"} + Group By Operator [GBY_16] (rows=1 width=178) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_14] (rows=1 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_30] (rows=1 width=178) + Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"] + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Select Operator [SEL_4] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_28] (rows=166 width=178) + predicate:(value > 'val_9') + TableScan [TS_2] (rows=500 width=178) + default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_9] (rows=250 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + PartitionCols:_col0, _col1 + Group By Operator [GBY_7] (rows=250 width=178) + Output:["_col0","_col1"],keys:key, value + TableScan [TS_5] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"} PREHOOK: query: explain select * from (select * @@ -2066,59 +2062,55 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_23] - Merge Join Operator [MERGEJOIN_33] (rows=1 width=178) - Conds:RS_19._col0, _col1=RS_20._col0, _col1(Inner),Output:["_col0","_col1"] + File Output Operator [FS_21] + Merge Join Operator [MERGEJOIN_31] (rows=1 width=178) + Conds:RS_17._col0, _col1=RS_18._col0, _col1(Left Semi),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_17] PartitionCols:_col0, _col1 Select Operator [SEL_1] (rows=500 width=178) Output:["_col0","_col1"] TableScan [TS_0] (rows=500 width=178) default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + <-Reducer 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=1 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_16] - PartitionCols:_col0, _col1 - Group By Operator [GBY_15] (rows=1 width=178) - Output:["_col0","_col1"],keys:_col2, _col3 - Merge Join Operator [MERGEJOIN_32] (rows=1 width=178) - Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col0, _col1 - Select Operator [SEL_4] (rows=166 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_30] (rows=166 width=178) - predicate:(value > 'val_9') - TableScan [TS_2] (rows=500 width=178) - default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=250 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=250 width=178) - Output:["_col0","_col1"],keys:key, value - TableScan [TS_5] (rows=500 width=178) - default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Group By Operator [GBY_16] (rows=1 width=178) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_14] (rows=1 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_30] (rows=1 width=178) + Conds:RS_11._col0, _col1=RS_12._col0, _col1(Inner),Output:["_col2","_col3"] + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_11] + PartitionCols:_col0, _col1 + Select Operator [SEL_4] (rows=166 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_28] (rows=166 width=178) + predicate:(value > 'val_9') + TableScan [TS_2] (rows=500 width=178) + default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_9] (rows=250 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + PartitionCols:_col0, _col1 + Group By Operator [GBY_7] (rows=250 width=178) + Output:["_col0","_col1"],keys:key, value + TableScan [TS_5] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select * from src_cbo @@ -2131,40 +2123,36 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 4 <- Map 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_14] - Merge Join Operator [MERGEJOIN_19] (rows=166 width=178) - Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col0","_col1"] + File Output Operator [FS_12] + Merge Join Operator [MERGEJOIN_17] (rows=166 width=178) + Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] + SHUFFLE [RS_8] PartitionCols:_col0 Select Operator [SEL_2] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=166 width=178) + Filter Operator [FIL_15] (rows=166 width=178) predicate:(key > '9') TableScan [TS_0] (rows=500 width=178) default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] + <-Map 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_9] PartitionCols:_col0 - Group By Operator [GBY_8] (rows=69 width=87) - Output:["_col0"],keys:KEY._col0 - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Group By Operator [GBY_6] (rows=69 width=87) - Output:["_col0"],keys:key - Filter Operator [FIL_18] (rows=166 width=87) - predicate:(key > '9') - TableScan [TS_3] (rows=500 width=87) - default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Group By Operator [GBY_7] (rows=69 width=87) + Output:["_col0"],keys:_col0 + Select Operator [SEL_5] (rows=166 width=87) + Output:["_col0"] + Filter Operator [FIL_16] (rows=166 width=87) + predicate:(key > '9') + TableScan [TS_3] (rows=500 width=87) + default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey @@ -2179,41 +2167,40 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 13 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 10 <- Map 12 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_46] - Select Operator [SEL_45] (rows=5 width=8) + File Output Operator [FS_44] + Select Operator [SEL_43] (rows=5 width=8) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_67] (rows=5 width=8) - Conds:RS_42._col1, _col4=RS_43._col0, _col1(Inner),Output:["_col0","_col3"] + Merge Join Operator [MERGEJOIN_65] (rows=5 width=8) + Conds:RS_40._col1, _col4=RS_41._col0, _col1(Left Semi),Output:["_col0","_col3"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_42] + SHUFFLE [RS_40] PartitionCols:_col1, _col4 - Merge Join Operator [MERGEJOIN_64] (rows=5 width=16) - Conds:RS_39._col0=RS_40._col1(Inner),Output:["_col0","_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_62] (rows=5 width=16) + Conds:RS_35._col0=RS_36._col1(Inner),Output:["_col0","_col1","_col3","_col4"] <-Map 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_40] + SHUFFLE [RS_36] PartitionCols:_col1 Select Operator [SEL_9] (rows=17 width=16) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_60] (rows=17 width=16) + Filter Operator [FIL_58] (rows=17 width=16) predicate:((l_linenumber = 1) and l_partkey is not null) TableScan [TS_7] (rows=100 width=16) default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] + SHUFFLE [RS_35] PartitionCols:_col0 Group By Operator [GBY_5] (rows=50 width=4) Output:["_col0"],keys:KEY._col0 @@ -2222,66 +2209,63 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_3] (rows=50 width=4) Output:["_col0"],keys:l_partkey - Filter Operator [FIL_59] (rows=100 width=4) + Filter Operator [FIL_57] (rows=100 width=4) predicate:l_partkey is not null TableScan [TS_0] (rows=100 width=4) default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_43] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] PartitionCols:_col0, _col1 - Group By Operator [GBY_37] (rows=4 width=8) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=4 width=8) - Output:["_col0","_col1"],keys:_col0, _col3 - Merge Join Operator [MERGEJOIN_66] (rows=14 width=8) - Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] - PartitionCols:_col1 - Select Operator [SEL_12] (rows=14 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_61] (rows=14 width=96) - predicate:(l_shipmode = 'AIR') - TableScan [TS_10] (rows=100 width=96) - default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"] - <-Reducer 12 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] - PartitionCols:_col0 - Group By Operator [GBY_29] (rows=3 width=4) - Output:["_col0"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_28] - PartitionCols:_col0 - Group By Operator [GBY_27] (rows=3 width=4) - Output:["_col0"],keys:_col2 - Merge Join Operator [MERGEJOIN_65] (rows=34 width=4) - Conds:RS_23._col0=RS_24._col0(Inner),Output:["_col2"] - <-Map 13 [SIMPLE_EDGE] llap - SHUFFLE [RS_24] - PartitionCols:_col0 - Select Operator [SEL_22] (rows=100 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_63] (rows=100 width=8) - predicate:l_partkey is not null - TableScan [TS_20] (rows=100 width=8) - default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey","l_linenumber"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Group By Operator [GBY_18] (rows=50 width=4) - Output:["_col0"],keys:KEY._col0 - <-Map 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=50 width=4) - Output:["_col0"],keys:l_partkey - Filter Operator [FIL_62] (rows=100 width=4) - predicate:l_partkey is not null - TableScan [TS_13] (rows=100 width=4) - default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"] + Group By Operator [GBY_39] (rows=4 width=8) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_34] (rows=14 width=8) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_64] (rows=14 width=8) + Conds:RS_31._col1=RS_32._col0(Inner),Output:["_col0","_col3"] + <-Map 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_31] + PartitionCols:_col1 + Select Operator [SEL_12] (rows=14 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_59] (rows=14 width=96) + predicate:(l_shipmode = 'AIR') + TableScan [TS_10] (rows=100 width=96) + default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_32] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=3 width=4) + Output:["_col0"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_28] + PartitionCols:_col0 + Group By Operator [GBY_27] (rows=3 width=4) + Output:["_col0"],keys:_col2 + Merge Join Operator [MERGEJOIN_63] (rows=34 width=4) + Conds:RS_23._col0=RS_24._col0(Inner),Output:["_col2"] + <-Map 12 [SIMPLE_EDGE] llap + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_22] (rows=100 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_61] (rows=100 width=8) + predicate:l_partkey is not null + TableScan [TS_20] (rows=100 width=8) + default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey","l_linenumber"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0 + Group By Operator [GBY_18] (rows=50 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=50 width=4) + Output:["_col0"],keys:l_partkey + Filter Operator [FIL_60] (rows=100 width=4) + predicate:l_partkey is not null + TableScan [TS_13] (rows=100 width=4) + default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_partkey"] PREHOOK: query: explain select key, value, count(*) from src_cbo b @@ -2300,23 +2284,22 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 llap - File Output Operator [FS_36] - Merge Join Operator [MERGEJOIN_49] (rows=34 width=186) - Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col0","_col1","_col2"] + File Output Operator [FS_33] + Merge Join Operator [MERGEJOIN_46] (rows=34 width=186) + Conds:RS_29._col2=RS_30._col0(Left Semi),Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_32] + SHUFFLE [RS_29] PartitionCols:_col2 - Filter Operator [FIL_42] (rows=83 width=186) + Filter Operator [FIL_39] (rows=83 width=186) predicate:_col2 is not null Group By Operator [GBY_16] (rows=83 width=186) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 @@ -2325,14 +2308,14 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_14] (rows=83 width=186) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Merge Join Operator [MERGEJOIN_48] (rows=166 width=178) + Merge Join Operator [MERGEJOIN_45] (rows=166 width=178) Conds:RS_10._col0=RS_11._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0 Select Operator [SEL_2] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_43] (rows=166 width=178) + Filter Operator [FIL_40] (rows=166 width=178) predicate:(key > '8') TableScan [TS_0] (rows=500 width=178) default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -2346,35 +2329,32 @@ Stage-0 PartitionCols:_col0 Group By Operator [GBY_6] (rows=69 width=87) Output:["_col0"],keys:key - Filter Operator [FIL_44] (rows=166 width=87) + Filter Operator [FIL_41] (rows=166 width=87) predicate:(key > '8') TableScan [TS_3] (rows=500 width=87) default@src_cbo,src_cbo,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_33] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_30] (rows=34 width=8) - Output:["_col0"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_29] - PartitionCols:_col0 - Group By Operator [GBY_28] (rows=34 width=8) - Output:["_col0"],keys:_col1 - Filter Operator [FIL_45] (rows=69 width=8) - predicate:_col1 is not null - Select Operator [SEL_47] (rows=69 width=8) - Output:["_col1"] - Group By Operator [GBY_24] (rows=69 width=95) - Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Group By Operator [GBY_22] (rows=69 width=95) - Output:["_col0","_col1"],aggregations:["count()"],keys:key - Filter Operator [FIL_46] (rows=166 width=87) - predicate:(key > '9') - TableScan [TS_19] (rows=500 width=87) - default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Group By Operator [GBY_28] (rows=34 width=8) + Output:["_col0"],keys:_col0 + Select Operator [SEL_26] (rows=69 width=8) + Output:["_col0"] + Filter Operator [FIL_42] (rows=69 width=8) + predicate:_col1 is not null + Select Operator [SEL_44] (rows=69 width=8) + Output:["_col1"] + Group By Operator [GBY_24] (rows=69 width=95) + Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 + <-Map 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0 + Group By Operator [GBY_22] (rows=69 width=95) + Output:["_col0","_col1"],aggregations:["count()"],keys:key + Filter Operator [FIL_43] (rows=166 width=87) + predicate:(key > '9') + TableScan [TS_19] (rows=500 width=87) + default@src_cbo,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] PREHOOK: query: explain select p_mfgr, p_name, avg(p_size) from part @@ -2392,20 +2372,19 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_23] - Merge Join Operator [MERGEJOIN_28] (rows=6 width=227) - Conds:RS_19._col1=RS_20._col0(Inner),Output:["_col0","_col1","_col2"] + File Output Operator [FS_21] + Merge Join Operator [MERGEJOIN_26] (rows=6 width=227) + Conds:RS_17._col1=RS_18._col0(Left Semi),Output:["_col0","_col1","_col2"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_19] + SHUFFLE [RS_17] PartitionCols:_col1 Select Operator [SEL_6] (rows=13 width=227) Output:["_col0","_col1","_col2"] @@ -2416,33 +2395,28 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_3] (rows=13 width=295) Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr - Filter Operator [FIL_26] (rows=26 width=223) + Filter Operator [FIL_24] (rows=26 width=223) predicate:p_name is not null TableScan [TS_0] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] - <-Reducer 6 [SIMPLE_EDGE] llap - SHUFFLE [RS_20] + <-Reducer 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] PartitionCols:_col0 - Group By Operator [GBY_17] (rows=13 width=184) - Output:["_col0"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=13 width=184) - Output:["_col0"],keys:_col0 - Select Operator [SEL_11] (rows=26 width=491) - Output:["_col0"] - Filter Operator [FIL_27] (rows=26 width=491) - predicate:first_value_window_0 is not null - PTF Operator [PTF_10] (rows=26 width=491) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_9] (rows=26 width=491) - Output:["_col1","_col2","_col5"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] - PartitionCols:p_mfgr - TableScan [TS_7] (rows=26 width=223) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] + Group By Operator [GBY_16] (rows=13 width=184) + Output:["_col0"],keys:_col0 + Select Operator [SEL_11] (rows=26 width=184) + Output:["_col0"] + Filter Operator [FIL_25] (rows=26 width=491) + predicate:first_value_window_0 is not null + PTF Operator [PTF_10] (rows=26 width=491) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_9] (rows=26 width=491) + Output:["_col1","_col2","_col5"] + <-Map 4 [SIMPLE_EDGE] llap + SHUFFLE [RS_8] + PartitionCols:p_mfgr + TableScan [TS_7] (rows=26 width=223) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] PREHOOK: query: explain select * from src_cbo diff --git a/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out b/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out index a11bbc4..611d929 100644 --- a/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/leftsemijoin.q.out @@ -108,3 +108,209 @@ POSTHOOK: query: drop table things POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@things POSTHOOK: Output: default@things +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 676 Data size: 81796 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +ECONOMY BRUSHED COPPER +ECONOMY BURNISHED STEEL +ECONOMY PLATED COPPER +ECONOMY POLISHED STEEL +LARGE BRUSHED BRASS +LARGE BRUSHED STEEL +LARGE BURNISHED STEEL +MEDIUM ANODIZED COPPER +MEDIUM BURNISHED BRASS +MEDIUM BURNISHED COPPER +MEDIUM BURNISHED TIN +MEDIUM BURNISHED TIN +PROMO ANODIZED TIN +PROMO BURNISHED NICKEL +PROMO PLATED TIN +PROMO PLATED TIN +PROMO POLISHED STEEL +SMALL BRUSHED BRASS +SMALL PLATED BRASS +SMALL PLATED STEEL +SMALL POLISHED NICKEL +STANDARD ANODIZED STEEL +STANDARD ANODIZED TIN +STANDARD BURNISHED TIN +STANDARD PLATED TIN +STANDARD POLISHED STEEL +PREHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: part + Select Operator + expressions: p_type (type: string) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +ECONOMY BRUSHED COPPER +ECONOMY BURNISHED STEEL +ECONOMY PLATED COPPER +ECONOMY POLISHED STEEL +LARGE BRUSHED BRASS +LARGE BRUSHED STEEL +LARGE BURNISHED STEEL +MEDIUM ANODIZED COPPER +MEDIUM BURNISHED BRASS +MEDIUM BURNISHED COPPER +MEDIUM BURNISHED TIN +MEDIUM BURNISHED TIN +PROMO ANODIZED TIN +PROMO BURNISHED NICKEL +PROMO PLATED TIN +PROMO PLATED TIN +PROMO POLISHED STEEL +SMALL BRUSHED BRASS +SMALL PLATED BRASS +SMALL PLATED STEEL +SMALL POLISHED NICKEL +STANDARD ANODIZED STEEL +STANDARD ANODIZED TIN +STANDARD BURNISHED TIN +STANDARD PLATED TIN +STANDARD POLISHED STEEL diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out index f092967..495ad09 100644 --- a/ql/src/test/results/clientpositive/llap/lineage3.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -178,7 +178,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@src1 #### A masked pattern was here #### -{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n (select cint from alltypesorc b\n where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = (. (tok_table_or_col $hdt$_1) key))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]} 311 val_311 Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select key, value from src1 diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 14b592b..23d68ab 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -25,10 +25,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +65,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: b @@ -88,7 +87,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -112,30 +111,21 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col2, _col3 Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col2 (type: string), _col3 (type: string) - mode: hash + Select Operator + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -291,10 +281,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -330,7 +319,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: b @@ -352,7 +341,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -376,30 +365,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: string) - mode: hash + Select Operator + expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 63432a0..155a901 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -17,8 +17,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -49,16 +48,20 @@ STAGE PLANS: Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - mode: hash + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -66,7 +69,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -79,19 +82,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 69 Data size: 6003 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -149,10 +139,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -191,7 +180,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: b @@ -213,7 +202,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -237,30 +226,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - mode: hash + Select Operator + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -338,10 +318,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -382,7 +361,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) @@ -460,19 +439,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -843,7 +809,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -887,14 +853,14 @@ STAGE PLANS: Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) - mode: complete + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -989,8 +955,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 4 <- Map 6 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1043,10 +1008,10 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: l_orderkey - Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: l_orderkey (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE @@ -1092,7 +1057,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -1109,19 +1074,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1518,10 +1470,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col5 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1564,24 +1516,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: string), _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -2417,10 +2361,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2457,7 +2400,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -2479,10 +2422,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) - 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2505,10 +2448,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col1 (type: string), _col4 (type: int) - outputColumnNames: _col3, _col1, _col4 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col3 (type: int), _col1 (type: string), _col4 (type: int) + keys: _col0 (type: int), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE @@ -2517,24 +2460,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 129 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2601,10 +2527,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2641,7 +2566,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -2663,7 +2588,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -2687,30 +2612,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: string), _col2 (type: string) - mode: hash + Select Operator + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2777,10 +2693,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2817,7 +2732,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -2843,7 +2758,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) @@ -2867,30 +2782,21 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col2 (type: int) - mode: hash + Select Operator + expressions: _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 18 Data size: 1944 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 9 Data size: 972 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2959,10 +2865,9 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3075,7 +2980,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3117,29 +3022,20 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3200,10 +3096,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) @@ -3299,29 +3194,20 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 12 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -3368,7 +3254,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3806,9 +3692,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 3 <- Union 4 (CONTAINS) - Map 6 <- Union 4 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 5 <- Union 4 (SIMPLE_EDGE) + Map 5 <- Union 4 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3855,7 +3740,7 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -3884,7 +3769,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3897,19 +3782,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE Union 4 Vertex: Union 4 @@ -3944,11 +3816,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3988,7 +3859,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: b @@ -4010,10 +3881,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col0 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -4052,10 +3923,10 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col2, _col1 + outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col2 (type: string), _col1 (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE @@ -4064,24 +3935,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4132,11 +3986,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 5 <- Map 9 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4176,7 +4029,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -4193,7 +4046,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 9 + Map 8 Map Operator Tree: TableScan alias: p @@ -4218,7 +4071,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) @@ -4261,10 +4114,10 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col2 (type: int) - outputColumnNames: _col3, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col3 (type: string), _col2 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE @@ -4273,20 +4126,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4629,12 +4469,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4654,27 +4493,6 @@ STAGE PLANS: value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 10 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_size (type: int), p_type (type: string) - outputColumnNames: p_size, p_type - Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_size (type: int), p_type (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs Map 3 Map Operator Tree: TableScan @@ -4695,7 +4513,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -4712,7 +4530,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 8 + Map 7 Map Operator Tree: TableScan alias: pp @@ -4732,7 +4550,28 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 11 + Map 9 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_size (type: int), p_type (type: string) + outputColumnNames: p_size, p_type + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int), p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4755,7 +4594,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: string), _col3 (type: int) 1 _col0 (type: string), _col1 (type: string), _col2 (type: int) @@ -4781,30 +4620,21 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col6, _col7 Statistics: Num rows: 7 Data size: 1603 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col6 (type: string), _col7 (type: int) - mode: hash + Select Operator + expressions: _col0 (type: string), _col6 (type: string), _col7 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 7 Data size: 1603 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 3 Data size: 687 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4821,7 +4651,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 + Reducer 8 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -4860,11 +4690,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 5 <- Map 9 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4904,7 +4733,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -4921,7 +4750,7 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 9 + Map 8 Map Operator Tree: TableScan alias: p @@ -4946,7 +4775,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) @@ -4989,10 +4818,10 @@ STAGE PLANS: Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col2 (type: int) - outputColumnNames: _col3, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 16 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col3 (type: string), _col2 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE @@ -5001,20 +4830,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -6748,9 +6564,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -6796,7 +6611,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToLong(_col0) (type: bigint) 1 _col0 (type: bigint) @@ -6830,19 +6645,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index bbdce1d..6e33d07 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -89,9 +89,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -142,16 +141,20 @@ STAGE PLANS: Filter Operator predicate: p_brand is not null (type: boolean) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_brand (type: string) - mode: hash + Select Operator + expressions: p_brand (type: string) outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -176,7 +179,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) @@ -202,19 +205,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -931,7 +921,7 @@ POSTHOOK: query: select * from part_null where p_brand IN (select p_brand from p POSTHOOK: type: QUERY POSTHOOK: Input: default@part_null #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null) @@ -945,10 +935,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) - Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -1016,31 +1005,22 @@ STAGE PLANS: Filter Operator predicate: p_brand is not null (type: boolean) Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_brand (type: string) - mode: hash + Select Operator + expressions: p_brand (type: string) outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1087,15 +1067,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 3581 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 3581 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1137,7 +1117,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -2507,14 +2487,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 13 <- Map 12 (SIMPLE_EDGE) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2537,7 +2516,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 12 + Map 11 Map Operator Tree: TableScan alias: part @@ -2594,7 +2573,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 9 + Map 8 Map Operator Tree: TableScan alias: part_null @@ -2617,27 +2596,6 @@ STAGE PLANS: Reducer 10 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 17 Data size: 3581 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 17 Data size: 3581 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 17 Data size: 3581 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Execution mode: llap - Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial @@ -2648,7 +2606,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8 Data size: 1685 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 12 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2683,15 +2641,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col3 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 6165 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 6165 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2720,29 +2678,41 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 28 Data size: 5605 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - mode: hash + Select Operator + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 5605 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 5605 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 28 Data size: 5605 Basic stats: COMPLETE Column stats: NONE + Reducer 9 Execution mode: llap Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 2802 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 14 Data size: 2802 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 17 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 17 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 3581 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4043,14 +4013,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 13 <- Map 12 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4070,24 +4039,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 10 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 12 + Map 11 Map Operator Tree: TableScan alias: part @@ -4121,7 +4073,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: pp @@ -4138,7 +4090,24 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 11 + Map 9 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4151,7 +4120,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 13 + Reducer 12 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4169,7 +4138,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -4210,30 +4179,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col5 Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col5 (type: string) - mode: hash + Select Operator + expressions: _col0 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -4258,7 +4218,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4318,7 +4278,7 @@ POSTHOOK: Input: default@part_null 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -Warning: Shuffle Join MERGEJOIN[100][tables = [$hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[98][tables = [$hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -4338,19 +4298,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE) - Reducer 13 <- Map 15 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) - Reducer 14 <- Reducer 13 (SIMPLE_EDGE) - Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) - Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) + Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) + Reducer 19 <- Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 20 <- Reducer 19 (SIMPLE_EDGE) Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) - Reducer 8 <- Reducer 17 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Reducer 20 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) + Reducer 8 <- Reducer 16 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Reducer 19 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4373,7 +4332,7 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 11 + Map 10 Map Operator Tree: TableScan alias: lineitem @@ -4393,7 +4352,7 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 15 + Map 14 Map Operator Tree: TableScan alias: li @@ -4413,7 +4372,7 @@ STAGE PLANS: value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs - Map 16 + Map 15 Map Operator Tree: TableScan alias: lineitem @@ -4433,7 +4392,7 @@ STAGE PLANS: value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 18 + Map 17 Map Operator Tree: TableScan alias: lineitem @@ -4493,20 +4452,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: double) Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 12 + Reducer 11 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4519,7 +4465,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 13 + Reducer 12 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -4540,7 +4486,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 14 + Reducer 13 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4553,7 +4499,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 17 + Reducer 16 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4570,7 +4516,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 19 + Reducer 18 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4588,20 +4534,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 20 + Reducer 19 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4619,6 +4552,19 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -4641,7 +4587,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) @@ -4706,10 +4652,10 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col4 (type: int) - outputColumnNames: _col0, _col4 - Statistics: Num rows: 14 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int), _col4 (type: int) + keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE @@ -4725,7 +4671,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[100][tables = [$hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[98][tables = [$hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 8' is a cross product PREHOOK: query: select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -4765,15 +4711,14 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (SIMPLE_EDGE) - Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) - Reducer 16 <- Reducer 15 (SIMPLE_EDGE) - Reducer 18 <- Map 17 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) + Reducer 15 <- Reducer 14 (SIMPLE_EDGE) + Reducer 17 <- Map 16 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) @@ -4815,7 +4760,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 14 + Map 13 Map Operator Tree: TableScan alias: s2 @@ -4831,7 +4776,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 17 + Map 16 Map Operator Tree: TableScan alias: s1 @@ -4921,30 +4866,21 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 13 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 15 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 34 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 14 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -4965,7 +4901,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 16 + Reducer 15 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -4978,7 +4914,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 214 Data size: 19474 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 18 + Reducer 17 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5037,7 +4973,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col2 (type: bigint) 1 _col0 (type: bigint) @@ -5419,7 +5355,7 @@ POSTHOOK: Input: default@src 431 val_431 3 430 val_430 3 417 val_417 3 -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_name IN (select p_name from part p where part.p_type <> '1') PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_name IN (select p_name from part p where part.p_type <> '1') @@ -5433,10 +5369,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5471,7 +5406,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -5496,7 +5431,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -5530,20 +5465,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 169 Data size: 38025 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 169 Data size: 38025 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 169 Data size: 38025 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5562,7 +5484,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_name IN (select p_name from part p where part.p_type <> '1') PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 48fe336..8530a35 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -4794,7 +4794,7 @@ almond azure aquamarine papaya violet almond antique medium spring khaki almond aquamarine sandy cyan gainsboro almond antique olive coral navajo -Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY POSTHOOK: query: explain select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) @@ -4809,11 +4809,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 5 <- Reducer 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -4910,29 +4909,20 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 12 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -5003,7 +4993,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -5053,7 +5043,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -5114,10 +5104,9 @@ STAGE PLANS: Reducer 18 <- Map 17 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Reducer 20 <- Map 19 (SIMPLE_EDGE) - Reducer 21 <- Reducer 20 (SIMPLE_EDGE) Reducer 3 <- Reducer 14 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 5 <- Reducer 20 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### @@ -5380,29 +5369,20 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 21 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -5456,7 +5436,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index a1a74a7..34eee35 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -2637,9 +2637,9 @@ POSTHOOK: Input: default@part_null 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about -Warning: Shuffle Join MERGEJOIN[91][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[92][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 12' is a cross product -Warning: Shuffle Join MERGEJOIN[93][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[89][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[90][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[91][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 12' is a cross product PREHOOK: query: explain select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 @@ -2653,17 +2653,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Reducer 9 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE), Reducer 16 (CUSTOM_SIMPLE_EDGE) - Reducer 13 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 18 (CUSTOM_SIMPLE_EDGE) - Reducer 14 <- Reducer 13 (SIMPLE_EDGE) - Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) - Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Reducer 15 (CUSTOM_SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 17 (CUSTOM_SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) + Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2684,7 +2683,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 11 + Map 10 Map Operator Tree: TableScan alias: part @@ -2699,7 +2698,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 15 + Map 14 Map Operator Tree: TableScan alias: part @@ -2719,7 +2718,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 17 + Map 16 Map Operator Tree: TableScan alias: part @@ -2796,24 +2795,7 @@ STAGE PLANS: value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs - Reducer 10 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 12 + Reducer 11 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2828,7 +2810,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Reducer 13 + Reducer 12 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2849,7 +2831,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 14 + Reducer 13 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2862,7 +2844,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 16 + Reducer 15 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2885,7 +2867,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 18 + Reducer 17 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2925,10 +2907,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col5 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2986,10 +2968,10 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: int) - outputColumnNames: _col2, _col1 + outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 1296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col2 (type: string), _col1 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE @@ -3005,9 +2987,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[91][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[92][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 12' is a cross product -Warning: Shuffle Join MERGEJOIN[93][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[89][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[90][tables = [$hdt$_4, $hdt$_5]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[91][tables = [$hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 12' is a cross product PREHOOK: query: select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5956,14 +5938,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 11 <- Map 10 (SIMPLE_EDGE) - Reducer 13 <- Map 12 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 6 <- Reducer 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 5 <- Reducer 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -5983,24 +5964,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 10 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 12 + Map 11 Map Operator Tree: TableScan alias: part @@ -6034,7 +5998,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: pp @@ -6051,7 +6015,24 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 11 + Map 9 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -6064,7 +6045,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 13 + Reducer 12 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -6082,7 +6063,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -6130,30 +6111,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col6 Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col6 (type: string) - mode: hash + Select Operator + expressions: _col0 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -6180,7 +6152,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 1728 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 9 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out index d96a5a4..185f08e 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -129,20 +129,19 @@ STAGE PLANS: Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) Reducer 15 <- Map 14 (SIMPLE_EDGE) Reducer 17 <- Map 16 (SIMPLE_EDGE) - Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) + Reducer 19 <- Map 18 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) - Reducer 21 <- Reducer 20 (SIMPLE_EDGE) - Reducer 23 <- Map 22 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) - Reducer 24 <- Reducer 23 (SIMPLE_EDGE) - Reducer 26 <- Map 25 (SIMPLE_EDGE) - Reducer 28 <- Map 27 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) - Reducer 29 <- Reducer 28 (SIMPLE_EDGE) + Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) + Reducer 22 <- Map 21 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) + Reducer 23 <- Reducer 22 (SIMPLE_EDGE) + Reducer 25 <- Map 24 (SIMPLE_EDGE) + Reducer 27 <- Map 26 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) + Reducer 28 <- Reducer 27 (SIMPLE_EDGE) + Reducer 29 <- Reducer 28 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 30 <- Reducer 29 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) - Reducer 32 <- Map 31 (SIMPLE_EDGE) - Reducer 34 <- Map 33 (SIMPLE_EDGE) - Reducer 4 <- Reducer 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 31 <- Map 30 (SIMPLE_EDGE) + Reducer 33 <- Map 32 (SIMPLE_EDGE) + Reducer 4 <- Reducer 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (SIMPLE_EDGE) @@ -247,7 +246,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 22 + Map 21 Map Operator Tree: TableScan alias: a @@ -266,7 +265,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 25 + Map 24 Map Operator Tree: TableScan alias: b @@ -285,7 +284,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 27 + Map 26 Map Operator Tree: TableScan alias: a @@ -304,7 +303,7 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 31 + Map 30 Map Operator Tree: TableScan alias: b @@ -323,7 +322,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 33 + Map 32 Map Operator Tree: TableScan alias: b @@ -513,7 +512,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 8881 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash @@ -524,20 +523,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 21 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 23 + Reducer 22 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -564,7 +550,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 24 + Reducer 23 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -579,7 +565,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) - Reducer 26 + Reducer 25 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -592,7 +578,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 28 + Reducer 27 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -613,7 +599,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) Statistics: Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 29 + Reducer 28 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -631,6 +617,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: boolean) + Reducer 29 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col4 (type: string), _col2 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: boolean) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -655,24 +658,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 30 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col4 (type: string), _col2 (type: string), _col1 (type: string) - sort order: +++ - Map-reduce partition columns: _col4 (type: string), _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 269 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: boolean) - Reducer 32 + Reducer 31 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -685,7 +671,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 34 + Reducer 33 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -703,7 +689,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index dfae461..7cbd4f6 100644 --- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -23,8 +23,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -89,14 +88,14 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: l_orderkey - Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: vectorOutput: false native: false projectedOutputColumns: null - keys: l_orderkey (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE @@ -148,17 +147,17 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinInnerBigOnlyLongOperator + className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: - 1 Reducer 5 + 1 Map 4 Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) @@ -178,36 +177,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [] - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -265,10 +234,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 7 <- Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) Reducer 8 <- Map 4 (BROADCAST_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 9 <- Reducer 2 (BROADCAST_EDGE), Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -452,16 +420,38 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int), _col4 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col4 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiMultiKeyOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col3 (type: int) + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col3 + input vertices: + 1 Reducer 8 + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2] + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: @@ -527,79 +517,34 @@ STAGE PLANS: input vertices: 0 Map 4 Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 1, col 0 - native: false - projectedOutputColumns: [] - keys: _col0 (type: int), _col3 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0, col 1 - native: false - projectedOutputColumns: [] - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [2, 3] - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator + projectedOutputColumns: [1, 0] + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, col 0 native: false - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + projectedOutputColumns: [] + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 96cfd65..26a70ac 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -4121,10 +4121,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Union 9 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) + Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 9 (CONTAINS) #### A masked pattern was here #### @@ -4151,7 +4150,7 @@ STAGE PLANS: enabled: false enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - Map 11 + Map 10 Map Operator Tree: TableScan alias: srcpart @@ -4221,57 +4220,7 @@ STAGE PLANS: enabled: false enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - Reducer 10 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ds (string) - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 - Reducer 12 + Reducer 11 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -4299,6 +4248,36 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 5 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -4324,7 +4303,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -4385,6 +4364,36 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 5 Union 3 Vertex: Union 3 Union 9 diff --git a/ql/src/test/results/clientpositive/masking_3.q.out b/ql/src/test/results/clientpositive/masking_3.q.out index 2d8a79e..754cff0 100644 --- a/ql/src/test/results/clientpositive/masking_3.q.out +++ b/ql/src/test/results/clientpositive/masking_3.q.out @@ -15,14 +15,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from masking_test_subq POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -99,28 +98,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -142,19 +119,19 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: int) 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -681,14 +658,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from masking_test_subq where key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -765,28 +741,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -811,19 +765,19 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: int) 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1347,14 +1301,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select key from masking_test_subq where key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1431,28 +1384,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -1476,19 +1407,19 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: int) 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2012,14 +1943,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select value from masking_test_subq where key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -2096,28 +2026,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -2142,23 +2050,23 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: int) 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2682,15 +2590,14 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from masking_test_subq join srcpart on (masking_test_subq.key = srcpart.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-3 depends on stages: Stage-5 - Stage-4 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-4 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -2767,28 +2674,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -2813,16 +2698,16 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: int) 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2838,7 +2723,7 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string) TableScan alias: srcpart @@ -7016,14 +6901,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from default.masking_test_subq where key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -7100,28 +6984,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -7146,19 +7008,19 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: int) 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7682,14 +7544,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from masking_test_subq where masking_test_subq.key > 0 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -7766,28 +7627,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -7812,19 +7651,19 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: int) 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/masking_4.q.out b/ql/src/test/results/clientpositive/masking_4.q.out index 9ddba3a..36889e6 100644 --- a/ql/src/test/results/clientpositive/masking_4.q.out +++ b/ql/src/test/results/clientpositive/masking_4.q.out @@ -153,14 +153,13 @@ with q1 as ( select * from masking_test where key = '5') select * from masking_test_subq POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -237,28 +236,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -280,19 +257,19 @@ STAGE PLANS: key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double), _col0 (type: int) 1 UDFToDouble(_col0) (type: double), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/query70.q.out index d0900a8..bf90cdd 100644 --- a/ql/src/test/results/clientpositive/perf/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/query70.q.out @@ -75,14 +75,13 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) Reducer 13 <- Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 4 <- Reducer 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) @@ -92,133 +91,128 @@ Stage-0 limit:100 Stage-1 Reducer 7 - File Output Operator [FS_64] - Limit [LIM_63] (rows=100 width=88) + File Output Operator [FS_62] + Limit [LIM_61] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_62] (rows=1149975358 width=88) + Select Operator [SEL_60] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_61] - Select Operator [SEL_59] (rows=1149975358 width=88) + SHUFFLE [RS_59] + Select Operator [SEL_57] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_58] (rows=1149975358 width=88) + PTF Operator [PTF_56] (rows=1149975358 width=88) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 DESC NULLS LAST","partition by:":"(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END"}] - Select Operator [SEL_57] (rows=1149975358 width=88) + Select Operator [SEL_55] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_56] + SHUFFLE [RS_54] PartitionCols:(grouping(_col5, 1) + grouping(_col5, 0)), CASE WHEN ((UDFToInteger(grouping(_col5, 0)) = 0)) THEN (_col0) ELSE (null) END - Select Operator [SEL_55] (rows=1149975358 width=88) + Select Operator [SEL_53] (rows=1149975358 width=88) Output:["_col0","_col1","_col4","_col5"] - Group By Operator [GBY_54] (rows=1149975358 width=88) + Group By Operator [GBY_52] (rows=1149975358 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_53] + SHUFFLE [RS_51] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_52] (rows=2299950717 width=88) + Group By Operator [GBY_50] (rows=2299950717 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col0, _col1, 0 - Select Operator [SEL_50] (rows=766650239 width=88) + Select Operator [SEL_48] (rows=766650239 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_92] (rows=766650239 width=88) - Conds:RS_47._col7=RS_48._col0(Inner),Output:["_col2","_col6","_col7"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_48] + Merge Join Operator [MERGEJOIN_90] (rows=766650239 width=88) + Conds:RS_45._col7=RS_46._col0(Left Semi),Output:["_col2","_col6","_col7"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_46] PartitionCols:_col0 - Group By Operator [GBY_39] (rows=58079562 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col0 - Group By Operator [GBY_37] (rows=116159124 width=88) - Output:["_col0"],keys:_col0 - Select Operator [SEL_32] (rows=116159124 width=88) - Output:["_col0"] - Filter Operator [FIL_84] (rows=116159124 width=88) - predicate:(rank_window_0 <= 5) - PTF Operator [PTF_31] (rows=348477374 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] - Select Operator [SEL_30] (rows=348477374 width=88) - Output:["_col0","_col1"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_29] + Group By Operator [GBY_44] (rows=116159124 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_32] (rows=116159124 width=88) + Output:["_col0"] + Filter Operator [FIL_82] (rows=116159124 width=88) + predicate:(rank_window_0 <= 5) + PTF Operator [PTF_31] (rows=348477374 width=88) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 DESC NULLS LAST","partition by:":"_col0"}] + Select Operator [SEL_30] (rows=348477374 width=88) + Output:["_col0","_col1"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_27] (rows=348477374 width=88) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col0 - Group By Operator [GBY_27] (rows=348477374 width=88) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Group By Operator [GBY_25] (rows=696954748 width=88) - Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6 - Select Operator [SEL_24] (rows=696954748 width=88) - Output:["_col6","_col2"] - Merge Join Operator [MERGEJOIN_91] (rows=696954748 width=88) - Conds:RS_21._col1=RS_22._col0(Inner),Output:["_col2","_col6"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_22] + Group By Operator [GBY_25] (rows=696954748 width=88) + Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6 + Select Operator [SEL_24] (rows=696954748 width=88) + Output:["_col6","_col2"] + Merge Join Operator [MERGEJOIN_89] (rows=696954748 width=88) + Conds:RS_21._col1=RS_22._col0(Inner),Output:["_col2","_col6"] + <-Map 16 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_85] (rows=1704 width=1910) + predicate:(s_store_sk is not null and s_state is not null) + TableScan [TS_15] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_88] (rows=633595212 width=88) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_83] (rows=575995635 width=88) + predicate:(ss_store_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_87] (rows=1704 width=1910) - predicate:(s_store_sk is not null and s_state is not null) - TableScan [TS_15] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_90] (rows=633595212 width=88) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_85] (rows=575995635 width=88) - predicate:(ss_store_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_9] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_86] (rows=8116 width=1119) - predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] + Select Operator [SEL_14] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_84] (rows=8116 width=1119) + predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_47] + SHUFFLE [RS_45] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_89] (rows=696954748 width=88) - Conds:RS_44._col1=RS_45._col0(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_87] (rows=696954748 width=88) + Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col2","_col6","_col7"] <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_45] + SHUFFLE [RS_41] PartitionCols:_col0 Select Operator [SEL_8] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_83] (rows=1704 width=1910) + Filter Operator [FIL_81] (rows=1704 width=1910) predicate:(s_state is not null and s_store_sk is not null) TableScan [TS_6] (rows=1704 width=1910) default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_county","s_state"] <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] + SHUFFLE [RS_40] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_88] (rows=633595212 width=88) - Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_86] (rows=633595212 width=88) + Conds:RS_37._col0=RS_38._col0(Inner),Output:["_col1","_col2"] <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_41] + SHUFFLE [RS_37] PartitionCols:_col0 Select Operator [SEL_2] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_81] (rows=575995635 width=88) + Filter Operator [FIL_79] (rows=575995635 width=88) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_42] + SHUFFLE [RS_38] PartitionCols:_col0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_82] (rows=8116 width=1119) + Filter Operator [FIL_80] (rows=8116 width=1119) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/spark/leftsemijoin.q.out b/ql/src/test/results/clientpositive/spark/leftsemijoin.q.out index a11bbc4..06df989 100644 --- a/ql/src/test/results/clientpositive/spark/leftsemijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/leftsemijoin.q.out @@ -108,3 +108,200 @@ POSTHOOK: query: drop table things POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@things POSTHOOK: Output: default@things +Warning: Shuffle Join JOIN[10][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 743 Data size: 93722 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 676 Data size: 85202 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[10][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select part.p_type from part join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +ECONOMY BRUSHED COPPER +ECONOMY BURNISHED STEEL +ECONOMY PLATED COPPER +ECONOMY POLISHED STEEL +LARGE BRUSHED BRASS +LARGE BRUSHED STEEL +LARGE BURNISHED STEEL +MEDIUM ANODIZED COPPER +MEDIUM BURNISHED BRASS +MEDIUM BURNISHED COPPER +MEDIUM BURNISHED TIN +MEDIUM BURNISHED TIN +PROMO ANODIZED TIN +PROMO BURNISHED NICKEL +PROMO PLATED TIN +PROMO PLATED TIN +PROMO POLISHED STEEL +SMALL BRUSHED BRASS +SMALL PLATED BRASS +SMALL PLATED STEEL +SMALL POLISHED NICKEL +STANDARD ANODIZED STEEL +STANDARD ANODIZED TIN +STANDARD BURNISHED TIN +STANDARD PLATED TIN +STANDARD POLISHED STEEL +PREHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +PREHOOK: type: QUERY +POSTHOOK: query: explain select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: part + Select Operator + expressions: p_type (type: string) + outputColumnNames: _col0 + ListSink + +PREHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select part.p_type from part left join (select p1.p_name from part p1, part p2 group by p1.p_name) pp ON pp.p_name = part.p_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +ECONOMY BRUSHED COPPER +ECONOMY BURNISHED STEEL +ECONOMY PLATED COPPER +ECONOMY POLISHED STEEL +LARGE BRUSHED BRASS +LARGE BRUSHED STEEL +LARGE BURNISHED STEEL +MEDIUM ANODIZED COPPER +MEDIUM BURNISHED BRASS +MEDIUM BURNISHED COPPER +MEDIUM BURNISHED TIN +MEDIUM BURNISHED TIN +PROMO ANODIZED TIN +PROMO BURNISHED NICKEL +PROMO PLATED TIN +PROMO PLATED TIN +PROMO POLISHED STEEL +SMALL BRUSHED BRASS +SMALL PLATED BRASS +SMALL PLATED STEEL +SMALL POLISHED NICKEL +STANDARD ANODIZED STEEL +STANDARD ANODIZED TIN +STANDARD BURNISHED TIN +STANDARD PLATED TIN +STANDARD POLISHED STEEL diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out index 4c1560d..bc25efe 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -24,10 +24,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +60,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 5 Map Operator Tree: TableScan alias: b @@ -84,7 +83,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -107,29 +106,21 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string), _col3 (type: string) - mode: hash + Select Operator + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 6 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -283,10 +274,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -318,7 +308,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 5 Map Operator Tree: TableScan alias: b @@ -341,15 +331,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -364,29 +354,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash + Select Operator + expressions: _col1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 6 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 6cc7fa7..0fde046 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -16,8 +16,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -46,21 +45,25 @@ STAGE PLANS: Filter Operator predicate: (key > '9') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash + Select Operator + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -73,18 +76,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -141,10 +132,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -179,7 +169,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: b @@ -202,7 +192,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -225,29 +215,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - mode: hash + Select Operator + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 6 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -323,10 +305,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2) Reducer 5 <- Reducer 4 (GROUP, 1) - Reducer 6 <- Reducer 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -362,7 +343,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) @@ -438,18 +419,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -748,7 +717,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: @@ -807,7 +776,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -849,14 +818,14 @@ STAGE PLANS: Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) - mode: complete + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Reducer 7 Reduce Operator Tree: Group By Operator @@ -949,8 +918,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 2) Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 6 (GROUP, 2) + Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -999,10 +967,10 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: l_orderkey + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: l_orderkey (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE @@ -1043,7 +1011,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -1060,18 +1028,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1385,7 +1341,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: @@ -1445,10 +1401,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col5 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1489,24 +1445,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Reducer 7 Reduce Operator Tree: Group By Operator @@ -2301,10 +2249,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2337,7 +2284,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col2 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -2360,15 +2307,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) - 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2385,10 +2332,10 @@ STAGE PLANS: Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: int), _col1 (type: string), _col4 (type: int) - outputColumnNames: _col3, _col1, _col4 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col3 (type: int), _col1 (type: string), _col4 (type: int) + keys: _col0 (type: int), _col1 (type: string), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE @@ -2397,23 +2344,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Reducer 6 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: int) @@ -2478,10 +2409,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2514,7 +2444,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -2537,15 +2467,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2560,29 +2490,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col2 (type: string) - mode: hash + Select Operator + expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 6 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -2647,10 +2569,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2683,7 +2604,7 @@ STAGE PLANS: Map-reduce partition columns: (_col1 + 1) (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -2706,15 +2627,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2729,29 +2650,21 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: int) - mode: hash + Select Operator + expressions: _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reducer 6 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -2818,10 +2731,9 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (GROUP, 2) Reducer 8 <- Map 7 (GROUP, 2) - Reducer 9 <- Reducer 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2925,7 +2837,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -2965,28 +2877,20 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reducer 9 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3046,10 +2950,9 @@ STAGE PLANS: Spark Edges: Reducer 11 <- Map 10 (GROUP, 2) - Reducer 12 <- Reducer 11 (GROUP, 2) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) Reducer 7 <- Reducer 6 (GROUP, 2) Reducer 9 <- Map 8 (GROUP, 2) @@ -3140,28 +3043,20 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reducer 12 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -3205,7 +3100,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3620,8 +3515,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 2), Map 5 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3664,7 +3558,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 52 Data size: 6294 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 4 Map Operator Tree: TableScan alias: part @@ -3690,7 +3584,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3703,18 +3597,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3746,11 +3628,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3786,7 +3667,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 7 + Map 6 Map Operator Tree: TableScan alias: b @@ -3809,10 +3690,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col0 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -3849,10 +3730,10 @@ STAGE PLANS: Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col1 (type: string) - outputColumnNames: _col2, _col1 + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col2 (type: string), _col1 (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE @@ -3861,23 +3742,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -3926,11 +3791,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3966,7 +3830,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -3985,7 +3849,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 9 + Map 8 Map Operator Tree: TableScan alias: p @@ -4007,15 +3871,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4048,10 +3912,10 @@ STAGE PLANS: Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col2 (type: int) - outputColumnNames: _col3, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col3 (type: string), _col2 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE @@ -4060,19 +3924,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -4384,12 +4236,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 2) - Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 11 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Map 9 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (GROUP, 2) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -4407,25 +4258,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col2 (type: string), _col3 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) - Map 10 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int), p_type (type: string) - outputColumnNames: p_size, p_type - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_size (type: int), p_type (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan @@ -4444,7 +4276,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: part @@ -4463,7 +4295,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 7 Map Operator Tree: TableScan alias: pp @@ -4481,7 +4313,26 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 11 + Map 9 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_size (type: int), p_type (type: string) + outputColumnNames: p_size, p_type + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_size (type: int), p_type (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 10 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: string) @@ -4502,15 +4353,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: string), _col3 (type: int) 1 _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 33 Data size: 4118 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 67 Data size: 8375 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 33 Data size: 4118 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 67 Data size: 8375 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4527,29 +4378,21 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col6, _col7 Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col6 (type: string), _col7 (type: int) - mode: hash + Select Operator + expressions: _col0 (type: string), _col6 (type: string), _col7 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3744 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 30 Data size: 3744 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 61 Data size: 7614 Basic stats: COMPLETE Column stats: NONE + Reducer 6 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: int) @@ -4565,7 +4408,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 9 + Reducer 8 Reduce Operator Tree: Join Operator condition map: @@ -4602,11 +4445,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Reducer 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -4642,7 +4484,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -4661,7 +4503,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 9 + Map 8 Map Operator Tree: TableScan alias: p @@ -4683,15 +4525,15 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4724,10 +4566,10 @@ STAGE PLANS: Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: string), _col2 (type: int) - outputColumnNames: _col3, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col3 (type: string), _col2 (type: int) + keys: _col0 (type: string), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE @@ -4736,19 +4578,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -6258,9 +6088,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Map 3 (GROUP, 1) - Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -6301,7 +6130,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToLong(_col0) (type: bigint) 1 _col0 (type: bigint) @@ -6334,18 +6163,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 5b3ee56..f071824 100644 --- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -22,8 +22,6 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark - Edges: - Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 3 @@ -58,56 +56,27 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: l_orderkey + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: vectorOutput: false native: false projectedOutputColumns: null - keys: l_orderkey (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) Map Vectorization: enabled: false enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - Reducer 5 - Execution mode: vectorized Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [] - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) Stage: Stage-1 Spark @@ -180,17 +149,17 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - className: VectorMapJoinInnerBigOnlyLongOperator + className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: - 1 Reducer 5 + 1 Map 4 Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) @@ -296,7 +265,6 @@ STAGE PLANS: Spark Edges: Reducer 8 <- Map 7 (GROUP, 2) - Reducer 9 <- Reducer 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 3 @@ -400,57 +368,32 @@ STAGE PLANS: input vertices: 0 Map 4 Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 1, col 0 - native: false - projectedOutputColumns: [] - keys: _col0 (type: int), _col3 (type: int) - mode: hash + Select Operator + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + vectorOutput: true + keyExpressions: col 1, col 0 native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: Uniform Hash IS false + projectedOutputColumns: [] + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE - Reducer 9 - Execution mode: vectorized - Local Work: - Map Reduce Local Work - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0, col 1 - native: false - projectedOutputColumns: [] - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - Spark Hash Table Sink Vectorization: - className: VectorSparkHashTableSinkOperator - native: true - keys: - 0 _col1 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) + Spark HashTable Sink Operator + Spark Hash Table Sink Vectorization: + className: VectorSparkHashTableSinkOperator + native: true + keys: + 0 _col1 (type: int), _col4 (type: int) + 1 _col0 (type: int), _col1 (type: int) Stage: Stage-1 Spark @@ -523,18 +466,18 @@ STAGE PLANS: Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) Map Join Vectorization: - className: VectorMapJoinInnerBigOnlyMultiKeyOperator + className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col3 input vertices: - 1 Reducer 9 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + 1 Reducer 8 + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 @@ -542,13 +485,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2] - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/subq_where_serialization.q.out b/ql/src/test/results/clientpositive/subq_where_serialization.q.out index 7feb0c7..f689651 100644 --- a/ql/src/test/results/clientpositive/subq_where_serialization.q.out +++ b/ql/src/test/results/clientpositive/subq_where_serialization.q.out @@ -4,9 +4,11 @@ POSTHOOK: query: explain select src.key from src where src.key in ( select disti POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-2 , consists of Stage-5, Stage-1 + Stage-5 has a backup stage: Stage-1 + Stage-3 depends on stages: Stage-5 + Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1 STAGE PLANS: Stage: Stage-2 @@ -36,9 +38,9 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) - mode: complete + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -47,13 +49,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 + Conditional Operator + + Stage: Stage-5 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:src + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:src + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -64,32 +78,64 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator + Map Join Operator + condition map: + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out index a310e49..412c579 100644 --- a/ql/src/test/results/clientpositive/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists.q.out @@ -17,14 +17,13 @@ where exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -90,39 +89,21 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string), _col3 (type: string) - mode: hash + Select Operator + expressions: _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -144,11 +125,11 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -296,14 +277,13 @@ where exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -366,39 +346,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash + Select Operator + expressions: _col1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -421,19 +383,19 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/subquery_exists_having.q.out b/ql/src/test/results/clientpositive/subquery_exists_having.q.out index 96263bf..51c98ed 100644 --- a/ql/src/test/results/clientpositive/subquery_exists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -20,10 +20,9 @@ having exists POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-5 is a root stage - Stage-3 depends on stages: Stage-5 - Stage-4 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -78,11 +77,11 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -96,7 +95,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -167,39 +166,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - mode: hash + Select Operator + expressions: _col2 (type: string) outputColumnNames: _col0 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -324,10 +305,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -360,30 +341,27 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Mux Operator - Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator Statistics: Num rows: 1457 Data size: 15478 Basic stats: COMPLETE Column stats: NONE Join Operator @@ -394,30 +372,27 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col2 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Mux Operator - Statistics: Num rows: 584 Data size: 6193 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out index 8cd1208..2d44a15 100644 --- a/ql/src/test/results/clientpositive/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -52,9 +52,8 @@ having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.k POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 + Stage-2 depends on stages: Stage-1, Stage-3 Stage-3 is a root stage - Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -112,11 +111,11 @@ STAGE PLANS: key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -165,39 +164,21 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -343,11 +324,11 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -398,9 +379,9 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) - mode: complete + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -490,10 +471,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 30 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 35 Data size: 3701 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -519,28 +500,23 @@ STAGE PLANS: expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 1057 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 528 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 35 Data size: 3701 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + Mux Operator + Statistics: Num rows: 40 Data size: 4230 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -728,47 +704,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -805,17 +755,21 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -870,9 +824,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-5 + Stage-7 depends on stages: Stage-2, Stage-5 , consists of Stage-8, Stage-3 + Stage-8 has a backup stage: Stage-3 + Stage-6 depends on stages: Stage-8 + Stage-3 Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-6, Stage-3 STAGE PLANS: Stage: Stage-1 @@ -990,6 +947,45 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1007,47 +1003,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -1084,17 +1054,21 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1119,9 +1093,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage Stage-1 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-1, Stage-5 + Stage-7 depends on stages: Stage-1, Stage-5 , consists of Stage-8, Stage-2 + Stage-8 has a backup stage: Stage-2 + Stage-6 depends on stages: Stage-8 + Stage-2 Stage-5 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-6, Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1292,6 +1269,45 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-7 + Conditional Operator + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Stage: Stage-2 Map Reduce Map Operator Tree: @@ -1309,47 +1325,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 84 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Group By Operator - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 42 Data size: 440 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 126 Data size: 1321 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Map Reduce @@ -1386,17 +1376,21 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1735,9 +1729,12 @@ having p_name in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 + Stage-5 depends on stages: Stage-1, Stage-3 , consists of Stage-6, Stage-2 + Stage-6 has a backup stage: Stage-2 + Stage-4 depends on stages: Stage-6 + Stage-2 Stage-3 is a root stage - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-4, Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1779,6 +1776,45 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-5 + Conditional Operator + + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Stage: Stage-2 Map Reduce Map Operator Tree: @@ -1796,47 +1832,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 3173 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 22 Data size: 4653 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 2326 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 33 Data size: 6979 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 3490 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Map Reduce diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index d46613b..bdfdc34 100644 --- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -39,14 +39,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from src11 where src11.key1 in (select key from src where src11.value1 = value and key > '9') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -113,39 +112,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - mode: hash + Select Operator + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -167,19 +148,19 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -196,14 +177,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from src a where a.key in (select key from src where a.value = value and key > '9') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-2 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -270,39 +250,21 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col2 (type: string) - mode: hash + Select Operator + expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Map Reduce @@ -324,11 +286,11 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -470,9 +432,9 @@ STAGE PLANS: Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) - mode: complete + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -500,11 +462,11 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) diff --git a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index bf21796..5ea9a65 100644 --- a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -16,16 +16,9 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-12 depends on stages: Stage-1 - Stage-9 depends on stages: Stage-12 - Stage-8 depends on stages: Stage-5, Stage-9 , consists of Stage-10, Stage-11, Stage-3 - Stage-10 has a backup stage: Stage-3 - Stage-6 depends on stages: Stage-10 - Stage-11 has a backup stage: Stage-3 - Stage-7 depends on stages: Stage-11 - Stage-3 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-6, Stage-7, Stage-3 + Stage-8 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 @@ -76,12 +69,15 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-12 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:li Fetch Operator limit: -1 + $hdt$_2:lineitem + Fetch Operator + limit: -1 Alias -> Map Local Operator Tree: $hdt$_1:li TableScan @@ -98,163 +94,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col1 (type: int) - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-8 - Conditional Operator - - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: + $hdt$_2:lineitem TableScan alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE @@ -263,46 +103,59 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int) - outputColumnNames: l_orderkey + outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: vectorOutput: false native: false projectedOutputColumns: null - keys: l_orderkey (type: int) + keys: _col0 (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map Vectorization: enabled: false enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - vectorOutput: false - native: false - projectedOutputColumns: null - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator @@ -352,20 +205,18 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-19 depends on stages: Stage-1 - Stage-14 depends on stages: Stage-19 - Stage-13 depends on stages: Stage-6, Stage-14 , consists of Stage-17, Stage-18, Stage-3 - Stage-17 has a backup stage: Stage-3 - Stage-11 depends on stages: Stage-17 - Stage-18 has a backup stage: Stage-3 - Stage-12 depends on stages: Stage-18 + Stage-16 depends on stages: Stage-1 + Stage-12 depends on stages: Stage-16 + Stage-11 depends on stages: Stage-12, Stage-13 , consists of Stage-15, Stage-3 + Stage-15 has a backup stage: Stage-3 + Stage-10 depends on stages: Stage-15 Stage-3 - Stage-7 is a root stage - Stage-21 depends on stages: Stage-7 - Stage-9 depends on stages: Stage-21 - Stage-20 depends on stages: Stage-9 - Stage-6 depends on stages: Stage-20 - Stage-0 depends on stages: Stage-11, Stage-12, Stage-3 + Stage-6 is a root stage + Stage-18 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-18 + Stage-17 depends on stages: Stage-8 + Stage-13 depends on stages: Stage-17 + Stage-0 depends on stages: Stage-10, Stage-3 STAGE PLANS: Stage: Stage-1 @@ -416,7 +267,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-19 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:li @@ -439,7 +290,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) - Stage: Stage-14 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -464,10 +315,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-13 + Stage: Stage-11 Conditional Operator - Stage: Stage-17 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -481,69 +332,25 @@ STAGE PLANS: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col3 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Map Vectorization: - enabled: false - enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - Local Work: - Map Reduce Local Work - - Stage: Stage-18 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - - Stage: Stage-12 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -570,29 +377,29 @@ STAGE PLANS: key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 66 Data size: 7983 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -640,7 +447,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-21 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_2:$hdt$_3:$hdt$_4:li @@ -663,7 +470,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-9 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -716,7 +523,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-20 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_2:$hdt$_2:lineitem @@ -739,7 +546,7 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) - Stage: Stage-6 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -751,46 +558,31 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col3 Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - vectorOutput: false - native: false - projectedOutputColumns: null - keys: _col0 (type: int), _col3 (type: int) - mode: hash + Select Operator + expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Group By Operator + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 60 Data size: 7258 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Map Vectorization: enabled: false enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat Local Work: Map Reduce Local Work - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - Group By Vectorization: - vectorOutput: false - native: false - projectedOutputColumns: null - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 3629 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator