diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index e1665fd..09c667e 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -642,7 +642,6 @@ spark.query.files=add_part_multiple.q, \ join35.q, \ join36.q, \ join37.q, \ - join38.q, \ join39.q, \ join40.q, \ join41.q, \ @@ -662,12 +661,10 @@ spark.query.files=add_part_multiple.q, \ join_filters.q, \ join_filters_overlap.q, \ join_hive_626.q, \ - join_literals.q, \ join_map_ppr.q, \ join_merge_multi_expressions.q, \ join_merging.q, \ join_nulls.q, \ - join_nullsafe.q, \ join_rc.q, \ join_reorder.q, \ join_reorder2.q, \ @@ -739,10 +736,8 @@ spark.query.files=add_part_multiple.q, \ ppd_join.q, \ ppd_join2.q, \ ppd_join3.q, \ - ppd_join4.q, \ ppd_join5.q, \ ppd_join_filter.q, \ - ppd_multi_insert.q, \ ppd_outer_join1.q, \ ppd_outer_join2.q, \ ppd_outer_join3.q, \ @@ -853,7 +848,6 @@ spark.query.files=add_part_multiple.q, \ stats9.q, \ statsfs.q, \ subquery_exists.q, \ - subquery_in.q, \ subquery_multiinsert.q, \ table_access_keys_stats.q, \ temp_table.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 819eef1..8b78123 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -73,15 +73,15 @@ ParseContext parseContext = context.getParseContext(); JoinOperator joinOp = (JoinOperator) nd; - /* - if (!conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) - && !(conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN))) { + + if (!conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) { + // && !(conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN))) { // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. - int pos = 0; // it doesn't matter which position we use in this case. - convertJoinSMBJoin(joinOp, context, pos, 0, false, false); + // int pos = 0; // it doesn't matter which position we use in this case. + // convertJoinSMBJoin(joinOp, context, pos, 0, false, false); return null; - }*/ + } // if we have traits, and table info is present in the traits, we know the // exact number of buckets. Else choose the largest number of estimated diff --git ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index ce8e751..b8a0dd7 100644 --- ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -155,35 +155,14 @@ POSTHOOK: query: -- number of rows explain select * from emp e join dept d on (e.deptid = d.deptid) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptname} - keys: - 0 deptid (type: int) - 1 deptid (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -194,32 +173,47 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - keys: - 0 deptid (type: int) - 1 deptid (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - input vertices: - 1 Map 2 - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: deptname (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7954 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -236,35 +230,14 @@ POSTHOOK: query: -- 2 relations, 2 attributes explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: dept - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 - keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -275,35 +248,49 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - input vertices: - 1 Map 2 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean) - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((_col1 = _col6) and (_col0 = _col7)) (type: boolean) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -316,35 +303,14 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: d - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 - keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -355,32 +321,46 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - input vertices: - 1 Map 2 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -397,35 +377,14 @@ POSTHOOK: query: -- 2 relations, 3 attributes explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: dept - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and deptname is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 - keys: - 0 deptid (type: int), lastname (type: string), lastname (type: string) - 1 deptid (type: int), deptname (type: string), deptname (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -436,35 +395,49 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and lastname is not null) (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - keys: - 0 deptid (type: int), lastname (type: string), lastname (type: string) - 1 deptid (type: int), deptname (type: string), deptname (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean) - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string), lastname (type: string) + sort order: +++ + Map-reduce partition columns: deptid (type: int), lastname (type: string), lastname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: dept + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and deptname is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string), deptname (type: string) + sort order: +++ + Map-reduce partition columns: deptid (type: int), deptname (type: string), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7 + Statistics: Num rows: 11 Data size: 2134 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col1 = _col6) and (_col0 = _col7)) and (_col7 = _col0)) (type: boolean) + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 194 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -481,96 +454,81 @@ POSTHOOK: query: -- 3 relations, 1 attribute explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) - Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptname} - 2 {lastname} {deptid} {locid} - keys: - 0 deptid (type: int) - 1 deptid (type: int) - 2 deptid (type: int) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) Map 3 Map Operator Tree: TableScan - alias: e1 - Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + alias: d + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - 2 {lastname} {locid} - keys: - 0 deptid (type: int) - 1 deptid (type: int) - 2 deptid (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: deptname (type: string) + Map 4 Map Operator Tree: TableScan - alias: e + alias: e1 Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - 2 {lastname} {deptid} {locid} - keys: - 0 deptid (type: int) - 1 deptid (type: int) - 2 deptid (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13 + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 658 Data size: 192794 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -585,16 +543,31 @@ POSTHOOK: query: -- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47 explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: deptid is not null (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: lastname (type: string), locid (type: int) + Map 3 Map Operator Tree: TableScan alias: d @@ -602,18 +575,13 @@ STAGE PLANS: Filter Operator predicate: deptid is not null (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptname} - 2 {state} {locid} {zip} {year} - keys: - 0 deptid (type: int) - 1 deptid (type: int) - 2 locid (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: deptid (type: int) + sort order: + + Map-reduce partition columns: deptid (type: int) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: deptname (type: string) + Map 4 Map Operator Tree: TableScan alias: l @@ -621,60 +589,35 @@ STAGE PLANS: Filter Operator predicate: locid is not null (type: boolean) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - 2 {state} {zip} {year} - keys: - 0 deptid (type: int) - 1 deptid (type: int) - 2 locid (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: e - Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: deptid is not null (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - 2 {state} {locid} {zip} {year} - keys: - 0 deptid (type: int) - 1 deptid (type: int) - 2 locid (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: locid (type: int) + sort order: + + Map-reduce partition columns: locid (type: int) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: state (type: string), zip (type: bigint), year (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 + Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -691,16 +634,31 @@ POSTHOOK: query: -- 3 relations and 2 attribute explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (deptid is not null and lastname is not null) (type: boolean) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: deptid (type: int), lastname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), lastname (type: string) + Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: locid (type: int) + Map 3 Map Operator Tree: TableScan alias: d @@ -708,18 +666,12 @@ STAGE PLANS: Filter Operator predicate: (deptid is not null and deptname is not null) (type: boolean) Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 - 2 {state} {locid} {zip} {year} - keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - 2 locid (type: int), state (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: deptid (type: int), deptname (type: string) + sort order: ++ + Map-reduce partition columns: deptid (type: int), deptname (type: string) + Statistics: Num rows: 6 Data size: 570 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 Map Operator Tree: TableScan alias: l @@ -727,60 +679,35 @@ STAGE PLANS: Filter Operator predicate: (locid is not null and state is not null) (type: boolean) Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - 2 {zip} {year} - keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - 2 locid (type: int), state (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: e - Statistics: Num rows: 48 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (deptid is not null and lastname is not null) (type: boolean) - Statistics: Num rows: 48 Data size: 4752 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {lastname} {deptid} {locid} - 1 {deptid} {deptname} - 2 {state} {locid} {zip} {year} - keys: - 0 deptid (type: int), lastname (type: string) - 1 deptid (type: int), deptname (type: string) - 2 locid (type: int), state (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: locid (type: int), state (type: string) + sort order: ++ + Map-reduce partition columns: locid (type: int), state (type: string) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: zip (type: bigint), year (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 2 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col6 (type: int), _col7 (type: string), _col11 (type: string), _col12 (type: int), _col13 (type: bigint), _col14 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index ea77e79..7f33e1b 100644 --- ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -168,36 +168,37 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 6 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 3), Reducer 3 (PARTITION-LEVEL SORT, 3) + Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 3), Reducer 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: dim_pay_date + alias: orderpayment Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: date is not null (type: boolean) + predicate: (((date is not null and dealid is not null) and cityid is not null) and userid is not null) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {dealid} {cityid} {userid} - 1 - keys: - 0 date (type: string) - 1 date (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: date (type: string) + sort order: + + Map-reduce partition columns: date (type: string) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: dealid (type: int), cityid (type: int), userid (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -246,27 +247,24 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [dim_pay_date] - Map 3 + /orderpayment_small [orderpayment] + Map 6 Map Operator Tree: TableScan - alias: deal + alias: dim_pay_date Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: dealid is not null (type: boolean) + predicate: date is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col3} {_col4} {_col9} - 1 - keys: - 0 _col0 (type: int) - 1 dealid (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: date (type: string) + sort order: + + Map-reduce partition columns: date (type: string) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -315,27 +313,24 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [deal] - Map 4 + /orderpayment_small [dim_pay_date] + Map 7 Map Operator Tree: TableScan - alias: order_city + alias: deal Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: cityid is not null (type: boolean) + predicate: dealid is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col4} {_col9} {_col16} - 1 - keys: - 0 _col3 (type: int) - 1 cityid (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: dealid (type: int) + sort order: + + Map-reduce partition columns: dealid (type: int) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -384,74 +379,24 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [order_city] - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /orderpayment_small [deal] + Map 8 Map Operator Tree: TableScan - alias: orderpayment + alias: order_city Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((date is not null and dealid is not null) and cityid is not null) and userid is not null) (type: boolean) + predicate: cityid is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {dealid} {cityid} {userid} - 1 {date} - keys: - 0 date (type: string) - 1 date (type: string) - outputColumnNames: _col0, _col3, _col4, _col9 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col3} {_col4} {_col9} - 1 {dealid} - keys: - 0 _col0 (type: int) - 1 dealid (type: int) - outputColumnNames: _col3, _col4, _col9, _col16 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col4} {_col9} {_col16} - 1 - keys: - 0 _col3 (type: int) - 1 cityid (type: int) - outputColumnNames: _col4, _col9, _col16 - input vertices: - 1 Map 4 - Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col9} {_col16} - 1 - keys: - 0 _col4 (type: int) - 1 userid (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: cityid (type: int) + sort order: + + Map-reduce partition columns: cityid (type: int) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -500,13 +445,8 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [orderpayment] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + /orderpayment_small [order_city] + Map 9 Map Operator Tree: TableScan alias: user @@ -516,50 +456,13 @@ STAGE PLANS: isSamplingPred: false predicate: userid is not null (type: boolean) Statistics: Num rows: 50 Data size: 144 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col9} {_col16} - 1 - keys: - 0 _col4 (type: int) - 1 userid (type: int) - outputColumnNames: _col9, _col16 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col9 (type: string), _col16 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: userid (type: int) + sort order: + + Map-reduce partition columns: userid (type: int) + Statistics: Num rows: 50 Data size: 144 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -609,6 +512,102 @@ STAGE PLANS: name: default.user_small Truncated Path -> Alias: /user_small [user] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col2} {VALUE._col3} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col3, _col4, _col9 + Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col3 (type: int), _col4 (type: int), _col9 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col2} {VALUE._col3} {VALUE._col8} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col3, _col4, _col9, _col16 + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col4 (type: int), _col9 (type: string), _col16 (type: int) + auto parallelism: false + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col3} {VALUE._col8} {VALUE._col15} + 1 + outputColumnNames: _col4, _col9, _col16 + Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col9 (type: string), _col16 (type: int) + auto parallelism: false + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col8} {VALUE._col15} + 1 + outputColumnNames: _col9, _col16 + Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col9 (type: string), _col16 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 158 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index cb4e07d..842b4b3 100644 --- ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -929,16 +929,18 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -950,23 +952,12 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -978,31 +969,31 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out index 835ee60..f24ae73 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_1.q.out @@ -104,60 +104,59 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE + alias: a + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: table2 + base file name: table1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 1 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.table2 + name default.table1 numFiles 1 numRows 0 rawDataSize 0 - serialization.ddl struct table2 { string key, string value} + serialization.ddl struct table1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 21 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -167,93 +166,67 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 1 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.table2 + name default.table1 numFiles 1 numRows 0 rawDataSize 0 - serialization.ddl struct table2 { string key, string value} + serialization.ddl struct table1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 21 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.table2 - name: default.table2 + name: default.table1 + name: default.table1 Truncated Path -> Alias: - /table2 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /table1 [a] + Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: table1 + base file name: table2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 1 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.table1 + name default.table2 numFiles 1 numRows 0 rawDataSize 0 - serialization.ddl struct table1 { string key, string value} + serialization.ddl struct table2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 + totalSize 21 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -263,26 +236,47 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 1 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.table1 + name default.table2 numFiles 1 numRows 0 rawDataSize 0 - serialization.ddl struct table1 { string key, string value} + serialization.ddl struct table2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 + totalSize 21 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.table1 - name: default.table1 + name: default.table2 + name: default.table2 Truncated Path -> Alias: - /table1 [a] + /table2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out index 73ba0b4..33e9e8b 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_2.q.out @@ -104,60 +104,59 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE + alias: a + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: table2 + base file name: table1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 1 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.table2 + name default.table1 numFiles 1 numRows 0 rawDataSize 0 - serialization.ddl struct table2 { string key, string value} + serialization.ddl struct table1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 21 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -167,93 +166,67 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 1 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.table2 + name default.table1 numFiles 1 numRows 0 rawDataSize 0 - serialization.ddl struct table2 { string key, string value} + serialization.ddl struct table1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 21 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.table2 - name: default.table2 + name: default.table1 + name: default.table1 Truncated Path -> Alias: - /table2 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /table1 [a] + Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 21 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: table1 + base file name: table2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 1 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.table1 + name default.table2 numFiles 1 numRows 0 rawDataSize 0 - serialization.ddl struct table1 { string key, string value} + serialization.ddl struct table2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 + totalSize 21 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -263,26 +236,47 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 1 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.table1 + name default.table2 numFiles 1 numRows 0 rawDataSize 0 - serialization.ddl struct table1 { string key, string value} + serialization.ddl struct table2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 20 + totalSize 21 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.table1 - name: default.table1 + name: default.table2 + name: default.table2 Truncated Path -> Alias: - /table1 [a] + /table2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out index c6e1b97..aaa0151 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -91,26 +91,54 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Local Work: - Map Reduce Local Work + Map 3 + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -199,26 +227,54 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 - Local Work: - Map Reduce Local Work + Map 3 + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -400,14 +456,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -420,16 +477,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -477,12 +532,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -492,52 +542,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -590,6 +602,47 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -788,14 +841,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -808,16 +862,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -865,12 +917,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -880,57 +927,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -983,6 +987,52 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 3 + numRows 464 + rawDataSize 8519 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1000,7 +1050,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 3 numRows 464 rawDataSize 8519 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out index eda8ca2..9954b77 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin10.q.out @@ -192,35 +192,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1737 Data size: 6950 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -233,22 +232,22 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 3 + name default.srcbucket_mapjoin_part_1 + numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 4200 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -261,16 +260,16 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 #### A masked pattern was here #### Partition base file name: part=2 @@ -280,22 +279,22 @@ STAGE PLANS: part 2 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part_1 + numFiles 3 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -308,62 +307,36 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - /srcbucket_mapjoin_part_2/part=2 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 1737 Data size: 6950 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 869 Data size: 3477 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -376,22 +349,22 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - numFiles 2 + name default.srcbucket_mapjoin_part_2 + numFiles 3 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -404,16 +377,16 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 #### A masked pattern was here #### Partition base file name: part=2 @@ -423,22 +396,22 @@ STAGE PLANS: part 2 properties: COLUMN_STATS_ACCURATE true - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - numFiles 3 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 4200 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -451,20 +424,41 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] + /srcbucket_mapjoin_part_2/part=1 [b] + /srcbucket_mapjoin_part_2/part=2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 955 Data size: 3824 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out index bb7214c..ad8f0a5 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin11.q.out @@ -202,35 +202,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -243,44 +242,44 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 4 + name default.srcbucket_mapjoin_part_1 + numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 #### A masked pattern was here #### Partition base file name: part=2 @@ -290,90 +289,64 @@ STAGE PLANS: part 2 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part_1 + numFiles 4 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - /srcbucket_mapjoin_part_2/part=2 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -386,44 +359,44 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - numFiles 2 + name default.srcbucket_mapjoin_part_2 + numFiles 4 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 #### A masked pattern was here #### Partition base file name: part=2 @@ -433,48 +406,69 @@ STAGE PLANS: part 2 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] + /srcbucket_mapjoin_part_2/part=1 [b] + /srcbucket_mapjoin_part_2/part=2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -609,35 +603,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int), part (type: string) - 1 key (type: int), part (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int), part (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), part (type: string) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -650,44 +643,44 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 4 + name default.srcbucket_mapjoin_part_1 + numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 #### A masked pattern was here #### Partition base file name: part=2 @@ -697,90 +690,64 @@ STAGE PLANS: part 2 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part_1 + numFiles 4 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - /srcbucket_mapjoin_part_2/part=2 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 2140 Data size: 8562 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int), part (type: string) - 1 key (type: int), part (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int), part (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), part (type: string) + Statistics: Num rows: 1070 Data size: 4281 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -793,44 +760,44 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - numFiles 2 + name default.srcbucket_mapjoin_part_2 + numFiles 4 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 #### A masked pattern was here #### Partition base file name: part=2 @@ -840,48 +807,69 @@ STAGE PLANS: part 2 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] + /srcbucket_mapjoin_part_2/part=1 [b] + /srcbucket_mapjoin_part_2/part=2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1177 Data size: 4709 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out index c0adef4..aa3e2b6 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin12.q.out @@ -161,35 +161,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -208,13 +207,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -224,66 +223,41 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count -1 + bucket_count 2 + bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -302,13 +276,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -318,25 +292,45 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 - bucket_field_name key + bucket_count -1 columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -461,35 +455,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -502,18 +495,19 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count -1 + bucket_count 2 + bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_3 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -529,61 +523,35 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_3 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_3 - name: default.srcbucket_mapjoin_part_3 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_3/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -596,19 +564,18 @@ STAGE PLANS: part 1 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key + bucket_count -1 columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_3 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -624,19 +591,40 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_3 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_3 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_3 + name: default.srcbucket_mapjoin_part_3 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_3/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out index 98d0706..44233f6 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out @@ -120,35 +120,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -162,18 +161,18 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE true bucket_count 2 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -189,75 +188,27 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 - Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 #### A masked pattern was here #### Partition - base file name: part=1 + base file name: part=2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - part 1 + part 2 properties: COLUMN_STATS_ACCURATE true bucket_count 2 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types int:string @@ -294,13 +245,36 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part_1 name: default.srcbucket_mapjoin_part_1 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: part=2 + base file name: part=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - part 2 + part 1 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -309,13 +283,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -331,20 +305,40 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -464,45 +458,44 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: part=1 + base file name: part=2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - part 1 + part 2 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -511,13 +504,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -533,71 +526,45 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=2 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: part=2 + base file name: part=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - part 2 + part 1 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -606,13 +573,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -628,19 +595,40 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -759,45 +747,44 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: part=1 + base file name: part=2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - part 1 + part 2 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -806,13 +793,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -828,71 +815,45 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=2 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: part=2 + base file name: part=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - part 2 + part 1 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -901,13 +862,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -923,19 +884,40 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1056,45 +1038,44 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: part=1 + base file name: part=2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - part 1 + part 2 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -1103,13 +1084,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1120,76 +1101,50 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count 2 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=2 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: part=2 + base file name: part=1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - part 2 + part 1 properties: COLUMN_STATS_ACCURATE true bucket_count 2 @@ -1198,13 +1153,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1215,24 +1170,45 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count 2 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=2 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out index c6fc8d5..c4702ef 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -158,36 +158,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -200,107 +199,64 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -313,46 +269,87 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part + name default.srcbucket_mapjoin_part_2 partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [a] + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -555,36 +552,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -597,112 +593,64 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 - numFiles 2 + name default.srcbucket_mapjoin_part + numFiles 4 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 3062 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -715,46 +663,92 @@ STAGE PLANS: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part - numFiles 4 + name default.srcbucket_mapjoin_part_2 + numFiles 2 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 3062 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 4 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part + name default.srcbucket_mapjoin_part_2 partition_columns ds partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part - name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part/ds=2008-04-08 [a] + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 3 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -772,7 +766,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 3 numRows 564 rawDataSize 10503 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} @@ -974,14 +968,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -994,16 +989,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1056,12 +1049,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -1071,57 +1059,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1222,6 +1167,52 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 3 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1239,7 +1230,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 3 numRows 564 rawDataSize 10503 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out index 1ae9e64..7c31e05 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -189,14 +189,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -209,16 +210,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -271,12 +270,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -286,52 +280,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -384,6 +340,47 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -593,14 +590,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -613,16 +611,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 15 Data size: 1583 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -675,12 +671,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -690,57 +681,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 564 - rawDataSize 10503 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -793,6 +741,52 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col7 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 3 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -810,7 +804,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 3 numRows 564 rawDataSize 10503 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out index 25c6c7b..a8e892e 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -175,36 +175,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -251,68 +250,25 @@ STAGE PLANS: name: default.srcbucket_mapjoin name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin [a] + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -359,7 +315,48 @@ STAGE PLANS: name: default.srcbucket_mapjoin name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin [a] + /srcbucket_mapjoin [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -543,36 +540,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -619,73 +615,25 @@ STAGE PLANS: name: default.srcbucket_mapjoin name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin [a] + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 2 - numRows 464 - rawDataSize 8519 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -732,7 +680,53 @@ STAGE PLANS: name: default.srcbucket_mapjoin name: default.srcbucket_mapjoin Truncated Path -> Alias: - /srcbucket_mapjoin [a] + /srcbucket_mapjoin [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 1512 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 3 + numRows 464 + rawDataSize 8519 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 8983 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -750,7 +744,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 2 + numFiles 3 numRows 464 rawDataSize 8519 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out index 94952a1..041ba12 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out @@ -225,14 +225,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -245,16 +246,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -302,12 +301,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -317,52 +311,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -463,6 +419,47 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -658,14 +655,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -678,16 +676,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -735,12 +731,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -750,57 +741,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - numFiles 1 - numRows 928 - rawDataSize 17038 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17966 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -901,6 +849,52 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 3 + numRows 928 + rawDataSize 17038 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 17966 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -918,7 +912,7 @@ STAGE PLANS: columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result - numFiles 1 + numFiles 3 numRows 928 rawDataSize 17038 serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out index ca59d02..54c4be3 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin7.q.out @@ -140,35 +140,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -188,13 +187,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 0 partition_columns ds/hr partition_columns.types string:string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -210,61 +209,36 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns ds/hr partition_columns.types string:string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/ds=2008-04-08/hr=0 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a] + Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col8 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: false - Local Work: - Map Reduce Local Work + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -284,13 +258,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 0 partition_columns ds/hr partition_columns.types string:string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -306,19 +280,40 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns ds/hr partition_columns.types string:string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/ds=2008-04-08/hr=0 [a] + /srcbucket_mapjoin_part_2/ds=2008-04-08/hr=0 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col8 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out index f419eaf..da9fe1c 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin8.q.out @@ -126,35 +126,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -173,13 +172,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -189,67 +188,41 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 3 + bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -268,13 +241,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -284,25 +257,46 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - bucket_count 2 + bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -435,35 +429,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -482,13 +475,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -499,66 +492,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count 2 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -577,13 +544,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -594,24 +561,45 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count 2 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out index 8b3848d..5a5e3f6 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin9.q.out @@ -134,13 +134,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,16 +155,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -215,14 +214,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: /srcbucket_mapjoin_part_1/part=1 [a] - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -232,32 +224,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 525 Data size: 2100 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 525 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -310,6 +283,27 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/part=1 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 577 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -476,35 +470,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -518,18 +511,18 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE true bucket_count 2 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -545,61 +538,35 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 687 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 344 Data size: 1377 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -613,18 +580,18 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE true bucket_count 2 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 0 partition_columns part partition_columns.types string rawDataSize 0 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 @@ -640,19 +607,40 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 378 Data size: 1514 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out index 4dbaaa2..5ac3f4c 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out @@ -133,14 +133,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,16 +154,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -210,12 +209,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -225,52 +219,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -323,6 +279,47 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out index 1bf1c41..e4ff965 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out @@ -135,14 +135,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -155,16 +156,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -212,12 +211,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -227,52 +221,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 29 Data size: 3062 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -373,6 +329,47 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 31 Data size: 3368 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out index bfe5438..fce5566 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative3.q.out @@ -195,35 +195,33 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -272,63 +270,24 @@ STAGE PLANS: name: default.test1 name: default.test1 Truncated Path -> Alias: - /test1 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test1 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -377,7 +336,43 @@ STAGE PLANS: name: default.test1 name: default.test1 Truncated Path -> Alias: - /test1 [l] + /test1 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -436,35 +431,33 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -513,63 +506,24 @@ STAGE PLANS: name: default.test2 name: default.test2 Truncated Path -> Alias: - /test2 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test2 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -618,7 +572,43 @@ STAGE PLANS: name: default.test2 name: default.test2 Truncated Path -> Alias: - /test2 [l] + /test2 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -674,35 +664,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: UDFToDouble(key) is not null (type: boolean) + predicate: (key + key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 (key + key) (type: double) - 1 UDFToDouble(key) (type: double) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: (key + key) (type: double) + sort order: + + Map-reduce partition columns: (key + key) (type: double) + Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string), value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -751,63 +740,25 @@ STAGE PLANS: name: default.test1 name: default.test1 Truncated Path -> Alias: - /test1 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test1 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (key + key) is not null (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 (key + key) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 11 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string), value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -856,7 +807,43 @@ STAGE PLANS: name: default.test1 name: default.test1 Truncated Path -> Alias: - /test1 [l] + /test1 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 12 Data size: 2420 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -915,55 +902,53 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test2 + base file name: test1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test2 + name default.test1 numFiles 3 - serialization.ddl struct test2 { string key, string value} + serialization.ddl struct test1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -976,99 +961,60 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test2 + name default.test1 numFiles 3 - serialization.ddl struct test2 { string key, string value} + serialization.ddl struct test1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test2 - name: default.test2 + name: default.test1 + name: default.test1 Truncated Path -> Alias: - /test2 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test1 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test1 + base file name: test2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test1 + name default.test2 numFiles 3 - serialization.ddl struct test1 { string key, string value} + serialization.ddl struct test2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -1081,23 +1027,59 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test1 + name default.test2 numFiles 3 - serialization.ddl struct test1 { string key, string value} + serialization.ddl struct test2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - name: default.test1 + name: default.test2 + name: default.test2 Truncated Path -> Alias: - /test1 [l] + /test2 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1156,41 +1138,39 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test3 + base file name: test1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1202,9 +1182,9 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.test3 + name default.test1 numFiles 3 - serialization.ddl struct test3 { string key, string value} + serialization.ddl struct test1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -1222,80 +1202,41 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.test3 + name default.test1 numFiles 3 - serialization.ddl struct test3 { string key, string value} + serialization.ddl struct test1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test3 - name: default.test3 + name: default.test1 + name: default.test1 Truncated Path -> Alias: - /test3 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test1 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test1 + base file name: test3 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1307,9 +1248,9 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.test1 + name default.test3 numFiles 3 - serialization.ddl struct test1 { string key, string value} + serialization.ddl struct test3 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -1327,18 +1268,54 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.test1 + name default.test3 numFiles 3 - serialization.ddl struct test1 { string key, string value} + serialization.ddl struct test3 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - name: default.test1 + name: default.test3 + name: default.test3 Truncated Path -> Alias: - /test1 [l] + /test3 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1397,55 +1374,53 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test4 + base file name: test1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test4 + name default.test1 numFiles 3 - serialization.ddl struct test4 { string key, string value} + serialization.ddl struct test1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -1458,99 +1433,60 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test4 + name default.test1 numFiles 3 - serialization.ddl struct test4 { string key, string value} + serialization.ddl struct test1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test4 - name: default.test4 + name: default.test1 + name: default.test1 Truncated Path -> Alias: - /test4 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test1 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test1 + base file name: test4 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test1 + name default.test4 numFiles 3 - serialization.ddl struct test1 { string key, string value} + serialization.ddl struct test4 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -1563,23 +1499,59 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test1 + name default.test4 numFiles 3 - serialization.ddl struct test1 { string key, string value} + serialization.ddl struct test4 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - name: default.test1 + name: default.test4 + name: default.test4 Truncated Path -> Alias: - /test1 [l] + /test4 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1638,55 +1610,53 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test3 + base file name: test2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test3 + name default.test2 numFiles 3 - serialization.ddl struct test3 { string key, string value} + serialization.ddl struct test2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -1699,99 +1669,60 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test3 + name default.test2 numFiles 3 - serialization.ddl struct test3 { string key, string value} + serialization.ddl struct test2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test3 - name: default.test3 + name: default.test2 + name: default.test2 Truncated Path -> Alias: - /test3 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test2 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test2 + base file name: test3 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test2 + name default.test3 numFiles 3 - serialization.ddl struct test2 { string key, string value} + serialization.ddl struct test3 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -1804,23 +1735,59 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test2 + name default.test3 numFiles 3 - serialization.ddl struct test2 { string key, string value} + serialization.ddl struct test3 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test2 - name: default.test2 + name: default.test3 + name: default.test3 Truncated Path -> Alias: - /test2 [l] + /test3 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1879,41 +1846,39 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test4 + base file name: test2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1925,9 +1890,9 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.test4 + name default.test2 numFiles 3 - serialization.ddl struct test4 { string key, string value} + serialization.ddl struct test2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -1945,80 +1910,41 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.test4 + name default.test2 numFiles 3 - serialization.ddl struct test4 { string key, string value} + serialization.ddl struct test2 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test4 - name: default.test4 + name: default.test2 + name: default.test2 Truncated Path -> Alias: - /test4 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test2 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test2 + base file name: test4 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -2030,9 +1956,9 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.test2 + name default.test4 numFiles 3 - serialization.ddl struct test2 { string key, string value} + serialization.ddl struct test4 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -2050,18 +1976,54 @@ STAGE PLANS: columns.comments columns.types string:string #### A masked pattern was here #### - name default.test2 + name default.test4 numFiles 3 - serialization.ddl struct test2 { string key, string value} + serialization.ddl struct test4 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test2 - name: default.test2 + name: default.test4 + name: default.test4 Truncated Path -> Alias: - /test2 [l] + /test4 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2120,55 +2082,53 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: r + alias: l Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test4 + base file name: test3 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test4 + name default.test3 numFiles 3 - serialization.ddl struct test4 { string key, string value} + serialization.ddl struct test3 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -2181,99 +2141,60 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name value + bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test4 + name default.test3 numFiles 3 - serialization.ddl struct test4 { string key, string value} + serialization.ddl struct test3 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test4 - name: default.test4 + name: default.test3 + name: default.test3 Truncated Path -> Alias: - /test4 [r] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /test3 [l] + Map 3 Map Operator Tree: TableScan - alias: l + alias: r Statistics: Num rows: 21 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 6 Data size: 1200 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test3 + base file name: test4 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test3 + name default.test4 numFiles 3 - serialization.ddl struct test3 { string key, string value} + serialization.ddl struct test4 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 @@ -2286,23 +2207,59 @@ STAGE PLANS: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 3 - bucket_field_name key + bucket_field_name value columns key,value columns.comments columns.types string:string #### A masked pattern was here #### - name default.test3 + name default.test4 numFiles 3 - serialization.ddl struct test3 { string key, string value} + serialization.ddl struct test4 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test3 - name: default.test3 + name: default.test4 + name: default.test4 Truncated Path -> Alias: - /test3 [l] + /test4 [r] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/column_access_stats.q.out ql/src/test/results/clientpositive/spark/column_access_stats.q.out index 79c1249..72b2bd7 100644 --- ql/src/test/results/clientpositive/spark/column_access_stats.q.out +++ ql/src/test/results/clientpositive/spark/column_access_stats.q.out @@ -372,71 +372,63 @@ FROM T1 JOIN T2 ON T1.key = T2.key PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: t2 + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -503,71 +495,63 @@ FROM T1 JOIN T2 ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((val = 3) and key is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: t2 + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((val = 3) and key is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), '3' (type: string), _col5 (type: string), '3' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), '3' (type: string), _col5 (type: string), '3' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -601,79 +585,71 @@ JOIN ON subq1.val = subq2.val PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = 6) and val is not null) (type: boolean) + predicate: ((key = 5) and val is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: val (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: t2 + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((key = 5) and val is not null) (type: boolean) + predicate: ((key = 6) and val is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: val (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -721,20 +697,22 @@ JOIN T3 ON T3.key = T4.key PREHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -742,88 +720,78 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: t3 - Statistics: Num rows: 0 Data size: 35 Basic stats: PARTIAL Column stats: NONE + alias: t2 + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {val} - keys: - 0 _col0 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: t1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: t3 + Statistics: Num rows: 0 Data size: 35 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {key} {val} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/cross_join.q.out ql/src/test/results/clientpositive/spark/cross_join.q.out index 38cc1e3..6d61fc5 100644 --- ql/src/test/results/clientpositive/spark/cross_join.q.out +++ ql/src/test/results/clientpositive/spark/cross_join.q.out @@ -5,32 +5,14 @@ POSTHOOK: query: -- current explain select src.key from src join src src2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -38,32 +20,39 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 - 1 - outputColumnNames: _col0 - input vertices: - 1 Map 2 + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -78,32 +67,14 @@ POSTHOOK: query: -- ansi cross join explain select src.key from src cross join src src2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -111,32 +82,39 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 - 1 - outputColumnNames: _col0 - input vertices: - 1 Map 2 + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -151,71 +129,63 @@ POSTHOOK: query: -- appending condition is allowed explain select src.key from src cross join src src2 on src.key=src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out index 9c3ac05..aa30a16 100644 --- ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -135,35 +135,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: g + alias: f Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((value <> '') and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col7 (type: string) - 1 value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -213,8 +212,8 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [g] - Map 3 + /filter_join_breaktask/ds=2008-04-08 [f] + Map 4 Map Operator Tree: TableScan alias: m @@ -224,16 +223,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key is not null and value is not null) and (value <> '')) (type: boolean) Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 7 Data size: 59 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -284,76 +281,23 @@ STAGE PLANS: name: default.filter_join_breaktask Truncated Path -> Alias: /filter_join_breaktask/ds=2008-04-08 [m] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan - alias: f + alias: g Statistics: Num rows: 25 Data size: 211 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((value <> '') and value is not null) (type: boolean) Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col7 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 _col7 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col13 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col13 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types int:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 13 Data size: 109 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -403,7 +347,62 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [f] + /filter_join_breaktask/ds=2008-04-08 [g] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col7 + Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: string) + sort order: + + Map-reduce partition columns: _col7 (type: string) + Statistics: Num rows: 14 Data size: 119 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: int) + auto parallelism: false + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col13 + Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col13 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 15 Data size: 130 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/groupby_position.q.out ql/src/test/results/clientpositive/spark/groupby_position.q.out index 46a1b25..930c2bc 100644 --- ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -541,15 +541,17 @@ FROM ( ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (GROUP, 3) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 3), Reducer 6 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -571,36 +573,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (GROUP, 3) - Reducer 5 <- Reducer 4 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 + Map 5 Map Operator Tree: TableScan alias: src2 @@ -618,37 +591,43 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reducer 4 - Local Work: - Map Reduce Local Work + Reducer 2 Reduce Operator Tree: Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - input vertices: - 0 Reducer 2 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: --++ Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: --++ - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE - Reducer 5 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) @@ -661,6 +640,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 3ea2bea..1757d16 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -2711,17 +2711,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -2737,16 +2738,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2800,12 +2799,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -2821,57 +2815,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2925,6 +2876,52 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 4 + numRows 10 + rawDataSize 32 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 42 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3079,18 +3076,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 3) + Reducer 4 <- Map 3 (GROUP, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Reducer 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -3102,17 +3099,17 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) + tag: 0 + value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3167,31 +3164,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 3 - Local Work: - Map Reduce Local Work - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -3203,51 +3176,18 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 + keys: key (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Reducer 3 - Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3301,6 +3241,59 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types string:bigint:string:string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: bigint) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -3486,7 +3479,7 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 1 + numFiles 3 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} @@ -3516,7 +3509,7 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 1 + numFiles 3 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} diff --git ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index 69999f5..04f481d 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -2801,17 +2801,18 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -2827,16 +2828,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2890,12 +2889,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -2911,57 +2905,14 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 32 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 42 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3015,6 +2966,52 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 4 + numRows 10 + rawDataSize 32 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 42 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3169,19 +3166,19 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 3) - Reducer 4 <- Reducer 3 (GROUP, 3) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 3) + Reducer 5 <- Reducer 4 (GROUP, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Reducer 5 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: t1 @@ -3193,17 +3190,17 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string), val (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 + keys: key (type: string) + mode: final + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: rand() (type: double) + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) + tag: 0 + value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -3258,48 +3255,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 3 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: partials - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col2 (type: bigint) - auto parallelism: false - Reducer 4 - Local Work: - Map Reduce Local Work - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -3311,51 +3267,18 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 + keys: key (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} {_col2} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Reducer 4 - Position of Big Table: 0 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3409,6 +3332,76 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types string:bigint:string:string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: bigint) + auto parallelism: false + Reducer 5 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: bigint) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -3612,7 +3605,7 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 1 + numFiles 3 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} @@ -3642,7 +3635,7 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 1 + numFiles 3 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} diff --git ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out index a44177c..fd65f53 100644 --- ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out +++ ql/src/test/results/clientpositive/spark/index_auto_self_join.q.out @@ -9,71 +9,65 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {key} - keys: - 0 value (type: string) - 1 value (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) + predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} - keys: - 0 value (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 2 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -125,36 +119,14 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - filterExpr: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {key} - keys: - 0 value (type: string) - 1 value (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -166,32 +138,48 @@ STAGE PLANS: Filter Operator predicate: ((value is not null and (key > 80)) and (key < 100)) (type: boolean) Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} - keys: - 0 value (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 2 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + filterExpr: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value is not null and (key > 70)) and (key < 90)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 314 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/innerjoin.q.out ql/src/test/results/clientpositive/spark/innerjoin.q.out index 7d1b424..a4bd2fc 100644 --- ql/src/test/results/clientpositive/spark/innerjoin.q.out +++ ql/src/test/results/clientpositive/spark/innerjoin.q.out @@ -19,73 +19,66 @@ FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator @@ -1189,32 +1182,14 @@ POSTHOOK: query: explain select * from (select * from src) inner left outer join on inner.key=src.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -1226,32 +1201,44 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {key} {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join0.q.out ql/src/test/results/clientpositive/spark/join0.q.out index aa8e811..01e0c26 100644 --- ql/src/test/results/clientpositive/spark/join0.q.out +++ ql/src/test/results/clientpositive/spark/join0.q.out @@ -15,16 +15,18 @@ SELECT src1.key as k1, src1.value as v1, SORT BY k1, v1, k2, v2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -36,23 +38,11 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src @@ -64,27 +54,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/spark/join1.q.out ql/src/test/results/clientpositive/spark/join1.q.out index 3473f11..8b1edf0 100644 --- ql/src/test/results/clientpositive/spark/join1.q.out +++ ql/src/test/results/clientpositive/spark/join1.q.out @@ -19,73 +19,66 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join10.q.out ql/src/test/results/clientpositive/spark/join10.q.out index 52059a8..d588946 100644 --- ql/src/test/results/clientpositive/spark/join10.q.out +++ ql/src/test/results/clientpositive/spark/join10.q.out @@ -17,16 +17,17 @@ ON (x.key = Y.key) SELECT Y.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -35,24 +36,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan alias: src @@ -61,35 +53,36 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join11.q.out ql/src/test/results/clientpositive/spark/join11.q.out index 2124047..f5f9cbf 100644 --- ql/src/test/results/clientpositive/spark/join11.q.out +++ ql/src/test/results/clientpositive/spark/join11.q.out @@ -19,16 +19,17 @@ JOIN ON src1.c1 = src2.c3 AND src1.c1 < 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -37,24 +38,15 @@ STAGE PLANS: predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan alias: src @@ -63,35 +55,36 @@ STAGE PLANS: predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join12.q.out ql/src/test/results/clientpositive/spark/join12.q.out index 98c71c2..d29cc18 100644 --- ql/src/test/results/clientpositive/spark/join12.q.out +++ ql/src/test/results/clientpositive/spark/join12.q.out @@ -25,108 +25,91 @@ JOIN ON src1.c1 = src3.c5 AND src3.c5 < 80 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col1} - 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 100) and (key < 80)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col1} - 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 80) and (key < 100)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {_col0} - 1 {_col1} - 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 + outputColumnNames: _col0, _col3 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join13.q.out ql/src/test/results/clientpositive/spark/join13.q.out index 3896c0e..81bd948 100644 --- ql/src/test/results/clientpositive/spark/join13.q.out +++ ql/src/test/results/clientpositive/spark/join13.q.out @@ -25,16 +25,18 @@ JOIN ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src @@ -43,98 +45,89 @@ STAGE PLANS: predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col3} - 1 - keys: - 0 (_col0 + _col2) (type: double) - 1 UDFToDouble(_col0) (type: double) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key < 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 + _col2) is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col3} - 1 - keys: - 0 (_col0 + _col2) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col2) is not null (type: boolean) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + _col2) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col2) (type: double) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col3} + 1 + outputColumnNames: _col0, _col3 + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join14.q.out ql/src/test/results/clientpositive/spark/join14.q.out index 28f2796..328f90c 100644 --- ql/src/test/results/clientpositive/spark/join14.q.out +++ ql/src/test/results/clientpositive/spark/join14.q.out @@ -21,14 +21,15 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,21 +40,12 @@ STAGE PLANS: Filter Operator predicate: (key > 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -61,33 +53,34 @@ STAGE PLANS: Filter Operator predicate: ((key > 100) and key is not null) (type: boolean) Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 167 Data size: 1774 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 183 Data size: 1951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join15.q.out ql/src/test/results/clientpositive/spark/join15.q.out index b4638cd..9917e36 100644 --- ql/src/test/results/clientpositive/spark/join15.q.out +++ ql/src/test/results/clientpositive/spark/join15.q.out @@ -5,72 +5,65 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key) SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/spark/join16.q.out ql/src/test/results/clientpositive/spark/join16.q.out index fb4268d..34a4cb1 100644 --- ql/src/test/results/clientpositive/spark/join16.q.out +++ ql/src/test/results/clientpositive/spark/join16.q.out @@ -3,13 +3,14 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT subq.key, tab.value FROM (select a.key, a.value from src a where a.key > 10 ) subq JOIN src tab ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) where tab.value < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,21 +25,12 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 key (type: string), value (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 9 Data size: 95 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan alias: tab @@ -46,32 +38,32 @@ STAGE PLANS: Filter Operator predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean) Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey1} + outputColumnNames: _col0, _col3 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join17.q.out ql/src/test/results/clientpositive/spark/join17.q.out index a89ddeb..fc5639a 100644 --- ql/src/test/results/clientpositive/spark/join17.q.out +++ ql/src/test/results/clientpositive/spark/join17.q.out @@ -57,36 +57,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -135,68 +134,25 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src2] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /src [src1] + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key1,value1,key2,value2 - columns.comments - columns.types int:string:int:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key1, string value1, i32 key2, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -245,7 +201,48 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src1] + /src [src2] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key1,value1,key2,value2 + columns.comments + columns.types int:string:int:string +#### A masked pattern was here #### + name default.dest1 + serialization.ddl struct dest1 { i32 key1, string value1, i32 key2, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join19.q.out ql/src/test/results/clientpositive/spark/join19.q.out index b54f575..eb121a6 100644 --- ql/src/test/results/clientpositive/spark/join19.q.out +++ ql/src/test/results/clientpositive/spark/join19.q.out @@ -123,16 +123,36 @@ t6.predicate='http://sofa.semanticweb.org/sofa/v1.0/system#__LABEL_REL' ON (t66.subject=t55.object) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Citation')) and subject is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: subject (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: t2 @@ -144,18 +164,13 @@ STAGE PLANS: expressions: subject (type: string), object (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col1} - 2 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col1 (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) + Map 6 Map Operator Tree: TableScan alias: t3 @@ -167,18 +182,13 @@ STAGE PLANS: expressions: subject (type: string), object (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col1} - 2 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col1 (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string) + Map 7 Map Operator Tree: TableScan alias: t4 @@ -190,18 +200,12 @@ STAGE PLANS: expressions: subject (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col2} {_col3} - 1 - 2 {_col1} - keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work - Map 5 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 8 Map Operator Tree: TableScan alias: t5 @@ -213,18 +217,13 @@ STAGE PLANS: expressions: subject (type: string), object (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col2} {_col3} - 1 - 2 {_col1} - keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work - Map 6 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) + Map 9 Map Operator Tree: TableScan alias: t6 @@ -236,92 +235,69 @@ STAGE PLANS: expressions: subject (type: string), object (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col2} {_col3} {_col7} - 1 {_col1} - keys: - 0 _col7 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 {VALUE._col0} + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Citation')) and subject is not null) (type: boolean) + value expressions: _col0 (type: string), _col2 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col2} {KEY.reducesinkkey0} + 1 + 2 {VALUE._col0} + outputColumnNames: _col0, _col2, _col3, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: string) + sort order: + + Map-reduce partition columns: _col7 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col2} {VALUE._col3} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col2, _col3, _col7, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: string), _col9 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: subject (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {_col0} - 1 {_col1} - 2 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col1 (type: string) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {_col0} {_col2} {_col3} - 1 - 2 {_col1} - keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3, _col7 - input vertices: - 1 Map 4 - 2 Map 5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col2} {_col3} {_col7} - 1 {_col1} - keys: - 0 _col7 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2, _col3, _col7, _col9 - input vertices: - 1 Map 6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col7 (type: string), _col9 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join2.q.out ql/src/test/results/clientpositive/spark/join2.q.out index 40c54cd..38483f9 100644 --- ql/src/test/results/clientpositive/spark/join2.q.out +++ ql/src/test/results/clientpositive/spark/join2.q.out @@ -19,78 +19,45 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 + _col5) is not null (type: boolean) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 (_col0 + _col5) (type: double) - 1 UDFToDouble(key) (type: double) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: src3 @@ -98,33 +65,53 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - keys: - 0 (_col0 + _col5) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col11 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + outputColumnNames: _col0, _col11 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join20.q.out ql/src/test/results/clientpositive/spark/join20.q.out index 7d4feb7..5c69eba 100644 --- ql/src/test/results/clientpositive/spark/join20.q.out +++ ql/src/test/results/clientpositive/spark/join20.q.out @@ -7,13 +7,15 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -24,22 +26,13 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: src2 @@ -47,65 +40,48 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 20)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reducer 4 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -692,13 +668,15 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 A SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -709,22 +687,13 @@ STAGE PLANS: Filter Operator predicate: ((key < 15) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: src2 @@ -732,65 +701,48 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 20)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reducer 4 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git ql/src/test/results/clientpositive/spark/join21.q.out ql/src/test/results/clientpositive/spark/join21.q.out index 7601371..b1f52b4 100644 --- ql/src/test/results/clientpositive/spark/join21.q.out +++ ql/src/test/results/clientpositive/spark/join21.q.out @@ -5,13 +5,15 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -19,22 +21,13 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: src2 @@ -42,65 +35,48 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 {(key < 10)} - 1 - 2 {(key < 10)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reducer 4 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) diff --git ql/src/test/results/clientpositive/spark/join22.q.out ql/src/test/results/clientpositive/spark/join22.q.out index 2c96cef..2e3ff3b 100644 --- ql/src/test/results/clientpositive/spark/join22.q.out +++ ql/src/test/results/clientpositive/spark/join22.q.out @@ -5,105 +5,98 @@ POSTHOOK: query: explain SELECT src5.src1_value FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: src4 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {_col3} - keys: - 0 key (type: string) - 1 _col2 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: src1 + alias: src4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {_col3} - keys: - 0 key (type: string) - 1 _col2 (type: string) - outputColumnNames: _col8 - input vertices: - 0 Map 3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col8 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col2} + outputColumnNames: _col8 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col8 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join23.q.out ql/src/test/results/clientpositive/spark/join23.q.out index 465288c..6924ace 100644 --- ql/src/test/results/clientpositive/spark/join23.q.out +++ ql/src/test/results/clientpositive/spark/join23.q.out @@ -5,72 +5,61 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/spark/join25.q.out ql/src/test/results/clientpositive/spark/join25.q.out index 84e496a..284c97d 100644 --- ql/src/test/results/clientpositive/spark/join25.q.out +++ ql/src/test/results/clientpositive/spark/join25.q.out @@ -21,14 +21,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,21 +40,13 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: y @@ -61,33 +54,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join26.q.out ql/src/test/results/clientpositive/spark/join26.q.out index 83f45f8..e271184 100644 --- ql/src/test/results/clientpositive/spark/join26.q.out +++ ql/src/test/results/clientpositive/spark/join26.q.out @@ -102,14 +102,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -122,18 +123,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -186,36 +182,29 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -223,13 +212,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -239,96 +226,55 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + /src [y] + Map 4 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col6, _col11 - input vertices: - 0 Map 1 - 2 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -336,11 +282,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -350,26 +298,66 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 {VALUE._col0} + outputColumnNames: _col0, _col6, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join27.q.out ql/src/test/results/clientpositive/spark/join27.q.out index a578279..d31f29e 100644 --- ql/src/test/results/clientpositive/spark/join27.q.out +++ ql/src/test/results/clientpositive/spark/join27.q.out @@ -21,14 +21,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,21 +40,13 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 value (type: string) - 1 value (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Map 3 Map Operator Tree: TableScan alias: y @@ -61,33 +54,33 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 value (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join3.q.out ql/src/test/results/clientpositive/spark/join3.q.out index 77e3479..80def7e 100644 --- ql/src/test/results/clientpositive/spark/join3.q.out +++ ql/src/test/results/clientpositive/spark/join3.q.out @@ -19,98 +19,81 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: src3 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col11 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 {VALUE._col0} + outputColumnNames: _col0, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join30.q.out ql/src/test/results/clientpositive/spark/join30.q.out index f6d2117..7fbbcfa 100644 --- ql/src/test/results/clientpositive/spark/join30.q.out +++ ql/src/test/results/clientpositive/spark/join30.q.out @@ -19,14 +19,16 @@ INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -37,23 +39,12 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: y @@ -61,33 +52,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/join36.q.out ql/src/test/results/clientpositive/spark/join36.q.out index a65e979..f1317ea 100644 --- ql/src/test/results/clientpositive/spark/join36.q.out +++ ql/src/test/results/clientpositive/spark/join36.q.out @@ -61,73 +61,67 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: y + alias: x Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {cnt} - 1 {cnt} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE + value expressions: cnt (type: int) + Map 3 Map Operator Tree: TableScan - alias: x + alias: y Statistics: Num rows: 309 Data size: 1482 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {cnt} - 1 {cnt} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 155 Data size: 743 Basic stats: COMPLETE Column stats: NONE + value expressions: cnt (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 170 Data size: 817 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join37.q.out ql/src/test/results/clientpositive/spark/join37.q.out index bdb9aa6..448e983 100644 --- ql/src/test/results/clientpositive/spark/join37.q.out +++ ql/src/test/results/clientpositive/spark/join37.q.out @@ -21,14 +21,15 @@ SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,21 +40,13 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: y @@ -61,33 +54,34 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join39.q.out ql/src/test/results/clientpositive/spark/join39.q.out index 416c92e..0734d4b 100644 --- ql/src/test/results/clientpositive/spark/join39.q.out +++ ql/src/test/results/clientpositive/spark/join39.q.out @@ -21,14 +21,15 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,52 +44,45 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {_col1} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {_col0} {_col1} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join4.q.out ql/src/test/results/clientpositive/spark/join4.q.out index e406143..b41adfb 100644 --- ql/src/test/results/clientpositive/spark/join4.q.out +++ ql/src/test/results/clientpositive/spark/join4.q.out @@ -41,81 +41,75 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 15) and (key < 25)) (type: boolean) + predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 10) and (key < 20)) (type: boolean) + predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join40.q.out ql/src/test/results/clientpositive/spark/join40.q.out index b054ad8..60ef13d 100644 --- ql/src/test/results/clientpositive/spark/join40.q.out +++ ql/src/test/results/clientpositive/spark/join40.q.out @@ -9,13 +9,14 @@ EXPLAIN SELECT x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -30,51 +31,44 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {_col1} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {_col0} {_col1} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -665,71 +659,64 @@ POSTHOOK: query: EXPLAIN select src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1784,13 +1771,15 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -1801,22 +1790,13 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: src2 @@ -1824,65 +1804,48 @@ STAGE PLANS: Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 20)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reducer 4 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -2469,13 +2432,15 @@ SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 A SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -2486,22 +2451,13 @@ STAGE PLANS: Filter Operator predicate: ((key < 15) and (key < 10)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: src2 @@ -2509,65 +2465,48 @@ STAGE PLANS: Filter Operator predicate: ((key < 10) and (key < 15)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 - 2 {(key < 20)} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 20)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reducer 4 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) @@ -3154,13 +3093,14 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -3175,51 +3115,44 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {_col1} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {_col0} {_col1} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3810,72 +3743,63 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(1) FROM SRC A JOIN SRC B ON (A.KEY=B.KEY) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/join41.q.out ql/src/test/results/clientpositive/spark/join41.q.out index 0cea03a..4e37621 100644 --- ql/src/test/results/clientpositive/spark/join41.q.out +++ ql/src/test/results/clientpositive/spark/join41.q.out @@ -15,16 +15,28 @@ POSTHOOK: query: EXPLAIN SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: src2 @@ -32,51 +44,33 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -104,16 +98,28 @@ EXPLAIN SELECT * FROM s1 src1 LEFT OUTER JOIN s1 src2 ON (src1.key = src2.key AND src2.key > 10) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: src2 @@ -121,51 +127,33 @@ STAGE PLANS: Filter Operator predicate: (key > 10) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join5.q.out ql/src/test/results/clientpositive/spark/join5.q.out index 9c0f6ba..48bae11 100644 --- ql/src/test/results/clientpositive/spark/join5.q.out +++ ql/src/test/results/clientpositive/spark/join5.q.out @@ -41,14 +41,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -63,21 +64,13 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan alias: src2 @@ -89,33 +82,34 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join8.q.out ql/src/test/results/clientpositive/spark/join8.q.out index 7761e56..cf234e7 100644 --- ql/src/test/results/clientpositive/spark/join8.q.out +++ ql/src/test/results/clientpositive/spark/join8.q.out @@ -41,84 +41,78 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) + predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 326 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 163 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join9.q.out ql/src/test/results/clientpositive/spark/join9.q.out index 620a487..95a872a 100644 --- ql/src/test/results/clientpositive/spark/join9.q.out +++ ql/src/test/results/clientpositive/spark/join9.q.out @@ -73,44 +73,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -118,11 +119,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -132,98 +135,49 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [src2] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /srcpart/ds=2008-04-08/hr=12 [src1] + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col8 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.dest1 - serialization.ddl struct dest1 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -231,13 +185,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -247,23 +199,67 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=12 [src1] + /src [src2] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col8 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.dest1 + serialization.ddl struct dest1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index 5426e15..f75bf52 100644 --- ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -5,32 +5,14 @@ POSTHOOK: query: explain select p1.p_name, p2.p_name from part p1 , part p2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: p2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_name} - 1 {p_name} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -38,32 +20,40 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_name} - 1 {p_name} - keys: - 0 - 1 - outputColumnNames: _col1, _col13 - input vertices: - 1 Map 2 + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 {VALUE._col1} + outputColumnNames: _col1, _col13 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -80,99 +70,81 @@ from part p1 ,part p2 ,part p3 where p1.p_name = p2.p_name and p2.p_name = p3.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_name} - 1 - 2 {p_name} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_name} - 1 {p_name} - 2 - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_name} - 1 {p_name} - 2 {p_name} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - outputColumnNames: _col1, _col13, _col25 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col1, _col13, _col25 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -189,13 +161,14 @@ from part p1 , (select p_name from part) p2 ,part p3 where p1.p_name = p2.p_name and p2.p_name = p3.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -210,82 +183,63 @@ STAGE PLANS: expressions: p_name (type: string) outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_name} - 1 - 2 {p_name} - keys: - 0 p_name (type: string) - 1 _col0 (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan - alias: p3 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_name} - 1 {_col0} - 2 - keys: - 0 p_name (type: string) - 1 _col0 (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_name} - 1 {_col0} - 2 {p_name} - keys: - 0 p_name (type: string) - 1 _col0 (type: string) - 2 p_name (type: string) - outputColumnNames: _col1, _col12, _col14 - input vertices: - 1 Map 1 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col12) and (_col12 = _col14)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col12 (type: string), _col14 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col1, _col12, _col14 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col12) and (_col12 = _col14)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col12 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -302,17 +256,27 @@ from part p1 , part p2 , part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -320,56 +284,11 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} - 1 {p_partkey} {p_name} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} - 1 {p_partkey} {p_name} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col12, _col13 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col12} - 1 {p_name} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -377,35 +296,54 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} - 1 {p_name} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col12, _col13, _col25 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -424,140 +362,127 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} - 1 - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p_name (type: string), p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} - 1 {p_partkey} {p_name} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p_name (type: string), p_partkey (type: int) - outputColumnNames: _col0, _col1, _col12, _col13 - input vertices: - 1 Map 2 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col12} - 1 {p_name} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan - alias: p4 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} {_col25} - 1 {p_name} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: p3 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} - 1 {p_name} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col12, _col13, _col25 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} {_col25} - 1 {p_partkey} {p_name} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 - input vertices: - 1 Map 4 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -576,140 +501,127 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} - 1 - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p_name (type: string), p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} - 1 {p_partkey} {p_name} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p_name (type: string), p_partkey (type: int) - outputColumnNames: _col0, _col1, _col12, _col13 - input vertices: - 1 Map 2 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col12} - 1 {p_name} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan - alias: p4 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} {_col25} - 1 {p_name} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: p3 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} - 1 {p_name} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col12, _col13, _col25 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col12} {_col13} {_col25} - 1 {p_partkey} {p_name} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 - input vertices: - 1 Map 4 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col12} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out index b7b6b4a..4dec1cf 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out @@ -5,96 +5,81 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p1.p_name = p2.p_name and p2.p_name = p3.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Map 3 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -109,96 +94,81 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_name = p1.p_name and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Map 3 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -213,17 +183,27 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -231,56 +211,11 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -288,32 +223,52 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -328,16 +283,27 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p2.p_partkey = 1 and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -345,16 +311,11 @@ STAGE PLANS: Filter Operator predicate: ((p_partkey = 1) and p_name is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -362,64 +323,49 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index 5892ce9..0e06ca6 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -5,125 +5,97 @@ POSTHOOK: query: explain select * from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 on p2.p_name = p3.p_name and p1.p_name = p4.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Map 3 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Map 4 Map Operator Tree: TableScan - alias: p4 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan - alias: p1 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - 3 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -140,137 +112,127 @@ from part p1 join part p2 join part p3 on p2.p_name = p1.p_name join part p4 on and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p_name (type: string), p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p_name (type: string), p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 6 Map Operator Tree: TableScan - alias: p4 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 7 Map Operator Tree: TableScan - alias: p3 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - input vertices: - 1 Map 4 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out index 34932a9..2ae56c9 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out @@ -7,99 +7,84 @@ from part p1 join part p2 join part p3 where p1.p_name = p2.p_name and p2.p_name = p3.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Map 3 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -116,99 +101,84 @@ from part p1 join part p2 join part p3 where p2.p_name = p1.p_name and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Map 3 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan - alias: p1 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -225,17 +195,27 @@ from part p1 join part p2 join part p3 where p2.p_partkey + p1.p_partkey = p1.p_partkey and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -243,56 +223,11 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -300,35 +235,55 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 370 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -345,16 +300,27 @@ from part p1 join part p2 join part p3 where p2.p_partkey = 1 and p3.p_name = p2.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -362,16 +328,11 @@ STAGE PLANS: Filter Operator predicate: (p_name is not null and (p_partkey = 1)) (type: boolean) Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -379,67 +340,52 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col25 = _col13) (type: boolean) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col25 = _col13) (type: boolean) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index 050291b..d75715a 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -7,128 +7,100 @@ from part p1 join part p2 join part p3 on p1.p_name = p2.p_name join part p4 where p2.p_name = p3.p_name and p1.p_name = p4.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Map 3 Map Operator Tree: TableScan - alias: p3 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Map 4 Map Operator Tree: TableScan - alias: p4 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan - alias: p1 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 2 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p_name (type: string) - 2 p_name (type: string) - 3 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -147,140 +119,130 @@ where p2.p_name = p3.p_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2.p_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p_name (type: string), p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p_name (type: string), p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 6 Map Operator Tree: TableScan - alias: p4 + alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) + predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 7 Map Operator Tree: TableScan - alias: p3 + alias: p4 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col13 (type: string) - 1 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - input vertices: - 1 Map 4 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out index 146cd28..7b204cf 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out @@ -61,16 +61,31 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name and p2_name = p3_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 Map Operator Tree: TableScan alias: p2 @@ -78,18 +93,13 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p3 @@ -97,60 +107,35 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -165,16 +150,31 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 Map Operator Tree: TableScan alias: p2 @@ -182,18 +182,13 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p3 @@ -201,60 +196,35 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -269,16 +239,27 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_partkey + p_partkey = p1.p_partkey and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -286,16 +267,11 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -303,67 +279,52 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -378,16 +339,27 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p2_partkey = 1 and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -395,16 +367,11 @@ STAGE PLANS: Filter Operator predicate: ((p2_partkey = 1) and p2_name is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -412,64 +379,49 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out index c7c0e28..6c1ef22 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out @@ -61,16 +61,31 @@ POSTHOOK: query: explain select * from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 on p2_name = p3_name and p1.p_name = p4.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 Map Operator Tree: TableScan alias: p2 @@ -78,20 +93,13 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p3 @@ -99,20 +107,13 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p4 @@ -120,66 +121,37 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - 3 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -196,17 +168,33 @@ from part p1 join part2 p2 join part3 p3 on p2_name = p1.p_name join part p4 on and p1.p_partkey = p2_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p2 @@ -214,16 +202,13 @@ STAGE PLANS: Filter Operator predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p2_name (type: string), p2_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: p2_name (type: string), p2_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 6 Map Operator Tree: TableScan alias: p3 @@ -231,69 +216,13 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p2_name (type: string), p2_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 7 Map Operator Tree: TableScan alias: p4 @@ -301,32 +230,65 @@ STAGE PLANS: Filter Operator predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out index 4f48f51..138a670 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out @@ -63,16 +63,31 @@ from part p1 join part2 p2 join part3 p3 where p1.p_name = p2_name and p2_name = p3_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 Map Operator Tree: TableScan alias: p2 @@ -80,18 +95,13 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p3 @@ -99,63 +109,38 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col13) and (_col13 = _col25)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -172,16 +157,31 @@ from part p1 join part2 p2 join part3 p3 where p2_name = p1.p_name and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 Map Operator Tree: TableScan alias: p2 @@ -189,18 +189,13 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p3 @@ -208,63 +203,38 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 28 Data size: 3460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col1) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -281,16 +251,27 @@ from part p1 join part2 p2 join part3 p3 where p2_partkey + p1.p_partkey = p1.p_partkey and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -298,16 +279,11 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -315,70 +291,55 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) - Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 7 Data size: 951 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and (_col25 = _col13)) (type: boolean) + Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 135 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -395,16 +356,27 @@ from part p1 join part2 p2 join part3 p3 where p2_partkey = 1 and p3_name = p2_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p2 @@ -412,16 +384,11 @@ STAGE PLANS: Filter Operator predicate: (p2_name is not null and (p2_partkey = 1)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_name (type: string), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p3 @@ -429,67 +396,52 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 3 - Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col25 = _col13) (type: boolean) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col25 = _col13) (type: boolean) + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), 1 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out index a0f158a..43ae2ec 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out @@ -63,16 +63,31 @@ from part p1 join part2 p2 join part3 p3 on p1.p_name = p2_name join part p4 where p2_name = p3_name and p1.p_name = p4.p_name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 3 Map Operator Tree: TableScan alias: p2 @@ -80,20 +95,13 @@ STAGE PLANS: Filter Operator predicate: p2_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: p2_name (type: string) + sort order: + + Map-reduce partition columns: p2_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_partkey (type: int), p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 4 Map Operator Tree: TableScan alias: p3 @@ -101,20 +109,13 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p4 @@ -122,69 +123,40 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - 3 p_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - 2 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - 3 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 p_name (type: string) - 1 p2_name (type: string) - 2 p3_name (type: string) - 3 p_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 - Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_name (type: string) + sort order: + + Map-reduce partition columns: p_name (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 2 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 3 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 42 Data size: 5190 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col13 = _col25) and (_col1 = _col37)) (type: boolean) + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1235 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -203,17 +175,33 @@ where p2_name = p3_name and p1.p_partkey = p4.p_partkey and p1.p_partkey = p2_partkey POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_partkey is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_name (type: string), p_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p_name (type: string), p_partkey (type: int) + Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE + value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Map 5 Map Operator Tree: TableScan alias: p2 @@ -221,16 +209,13 @@ STAGE PLANS: Filter Operator predicate: (p2_name is not null and p2_partkey is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p2_name (type: string), p2_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: p2_name (type: string), p2_partkey (type: int) + sort order: ++ + Map-reduce partition columns: p2_name (type: string), p2_partkey (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p2_mfgr (type: string), p2_brand (type: string), p2_type (type: string), p2_size (type: int), p2_container (type: string), p2_retailprice (type: double), p2_comment (type: string) + Map 6 Map Operator Tree: TableScan alias: p3 @@ -238,69 +223,13 @@ STAGE PLANS: Filter Operator predicate: p3_name is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (p_name is not null and p_partkey is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 {p2_partkey} {p2_name} {p2_mfgr} {p2_brand} {p2_type} {p2_size} {p2_container} {p2_retailprice} {p2_comment} - keys: - 0 p_name (type: string), p_partkey (type: int) - 1 p2_name (type: string), p2_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Map 2 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} - 1 {p3_partkey} {p3_name} {p3_mfgr} {p3_brand} {p3_type} {p3_size} {p3_container} {p3_retailprice} {p3_comment} - keys: - 0 _col13 (type: string) - 1 p3_name (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Reduce Output Operator + key expressions: p3_name (type: string) + sort order: + + Map-reduce partition columns: p3_name (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: p3_partkey (type: int), p3_mfgr (type: string), p3_brand (type: string), p3_type (type: string), p3_size (type: int), p3_container (type: string), p3_retailprice (type: double), p3_comment (type: string) + Map 7 Map Operator Tree: TableScan alias: p4 @@ -308,35 +237,68 @@ STAGE PLANS: Filter Operator predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} {_col12} {_col13} {_col14} {_col15} {_col16} {_col17} {_col18} {_col19} {_col20} {_col24} {_col25} {_col26} {_col27} {_col28} {_col29} {_col30} {_col31} {_col32} - 1 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - input vertices: - 0 Map 1 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} {VALUE._col12} {KEY.reducesinkkey0} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32 + Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index 87bfb89..8f2d4e0 100644 --- ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -93,43 +93,30 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 50) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter mappings: - 0 [1, 1, 2, 1] - filter predicates: - 0 {(value = 50)} {(value = 60)} - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -178,35 +165,25 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] + /a [a] Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 60) (type: boolean) + predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - filter mappings: - 0 [1, 1, 2, 1] - filter predicates: - 0 {(value = 50)} {(value = 60)} - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -255,69 +232,25 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /a [b] + Map 4 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter mappings: - 0 [1, 1, 2, 1] - filter predicates: - 0 {(value = 50)} {(value = 60)} - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -366,7 +299,51 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [a] + /a [c] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 0 [1, 1, 2, 1] + filter predicates: + 0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} + 1 + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -475,13 +452,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -494,24 +472,14 @@ STAGE PLANS: isSamplingPred: false predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter mappings: - 1 [0, 1, 2, 1] - filter predicates: - 0 - 1 {(value = 50)} {(value = 60)} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -564,31 +532,17 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 60) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - filter mappings: - 1 [0, 1, 2, 1] - filter predicates: - 0 - 1 {(value = 50)} {(value = 60)} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -637,69 +591,25 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + /a [b] + Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Map Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter mappings: - 1 [0, 1, 2, 1] - filter predicates: - 0 - 1 {(value = 50)} {(value = 60)} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -748,7 +658,51 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] + /a [c] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 1 [0, 1, 2, 1] + filter predicates: + 0 + 1 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -871,13 +825,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -890,24 +845,14 @@ STAGE PLANS: isSamplingPred: false predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter mappings: - 1 [0, 2, 2, 2] - filter predicates: - 0 - 1 {(value = 50)} {(value > 10)} {(value = 60)} {(value > 20)} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -960,31 +905,17 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 60) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - filter mappings: - 1 [0, 2, 2, 2] - filter predicates: - 0 - 1 {(value = 50)} {(value > 10)} {(value = 60)} {(value > 20)} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1033,69 +964,25 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + /a [b] + Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Map Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter mappings: - 1 [0, 2, 2, 2] - filter predicates: - 0 - 1 {(value = 50)} {(value > 10)} {(value = 60)} {(value > 20)} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Filter Operator + isSamplingPred: false + predicate: (value = 60) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1144,7 +1031,51 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] + /a [c] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 1 [0, 2, 2, 2] + filter predicates: + 0 + 1 {(VALUE._col0 = 50)} {(VALUE._col0 > 10)} {(VALUE._col0 = 60)} {(VALUE._col0 > 20)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1726,46 +1657,30 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: (value = 50) (type: boolean) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - 3 {key} {value} - filter mappings: - 0 [1, 1, 2, 1, 3, 1] - filter predicates: - 0 {(value = 50)} {(value = 60)} {(value = 40)} - 1 - 2 - 3 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1814,38 +1729,25 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [b] + /a [a] Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 60) (type: boolean) + predicate: (value = 50) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - 3 {key} {value} - filter mappings: - 0 [1, 1, 2, 1, 3, 1] - filter predicates: - 0 {(value = 50)} {(value = 60)} {(value = 40)} - 1 - 2 - 3 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1894,38 +1796,25 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [c] + /a [b] Map 4 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value = 40) (type: boolean) + predicate: (value = 60) (type: boolean) Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - 3 {value} - filter mappings: - 0 [1, 1, 2, 1, 3, 1] - filter predicates: - 0 {(value = 50)} {(value = 60)} {(value = 40)} - 1 - 2 - 3 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1974,74 +1863,25 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [d] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /a [c] + Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - Left Outer Join0 to 3 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - 3 {key} {value} - filter mappings: - 0 [1, 1, 2, 1, 3, 1] - filter predicates: - 0 {(value = 50)} {(value = 60)} {(value = 40)} - 1 - 2 - 3 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 - Position of Big Table: 0 - Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 - columns.types int:int:int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Filter Operator + isSamplingPred: false + predicate: (value = 40) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + tag: 3 + value expressions: value (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2090,7 +1930,54 @@ STAGE PLANS: name: default.a name: default.a Truncated Path -> Alias: - /a [a] + /a [d] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 0 [1, 1, 2, 1, 3, 1] + filter predicates: + 0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} {(VALUE._col0 = 40)} + 1 + 2 + 3 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 9 Data size: 59 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types int:int:int:int:int:int:int:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_hive_626.q.out ql/src/test/results/clientpositive/spark/join_hive_626.q.out index 7fa0894..48f7b28 100644 --- ql/src/test/results/clientpositive/spark/join_hive_626.q.out +++ ql/src/test/results/clientpositive/spark/join_hive_626.q.out @@ -65,16 +65,32 @@ select hive_foo.foo_name, hive_bar.bar_name, n from hive_foo join hive_bar on hi hive_bar.foo_id join hive_count on hive_count.bar_id = hive_bar.bar_id POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: hive_foo + Statistics: Num rows: 0 Data size: 15 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: foo_id is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: foo_id (type: int) + sort order: + + Map-reduce partition columns: foo_id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: foo_name (type: string) + Map 4 Map Operator Tree: TableScan alias: hive_bar @@ -82,16 +98,13 @@ STAGE PLANS: Filter Operator predicate: (foo_id is not null and bar_id is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {foo_name} - 1 {bar_id} {bar_name} - keys: - 0 foo_id (type: int) - 1 foo_id (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: foo_id (type: int) + sort order: + + Map-reduce partition columns: foo_id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: bar_id (type: int), bar_name (type: string) + Map 5 Map Operator Tree: TableScan alias: hive_count @@ -99,67 +112,49 @@ STAGE PLANS: Filter Operator predicate: bar_id is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} {_col13} - 1 {n} - keys: - 0 _col9 (type: int) - 1 bar_id (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: hive_foo - Statistics: Num rows: 0 Data size: 15 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: foo_id is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {foo_name} - 1 {bar_id} {bar_name} - keys: - 0 foo_id (type: int) - 1 foo_id (type: int) - outputColumnNames: _col1, _col9, _col13 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: bar_id (type: int) + sort order: + + Map-reduce partition columns: bar_id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} {_col13} - 1 {n} - keys: - 0 _col9 (type: int) - 1 bar_id (type: int) - outputColumnNames: _col1, _col13, _col22 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col22 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: n (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} {VALUE._col3} + outputColumnNames: _col1, _col9, _col13 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col9 (type: int) + sort order: + + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col13 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col12} + 1 {VALUE._col0} + outputColumnNames: _col1, _col13, _col22 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col22 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_map_ppr.q.out ql/src/test/results/clientpositive/spark/join_map_ppr.q.out index 8f500be..59fdb99 100644 --- ql/src/test/results/clientpositive/spark/join_map_ppr.q.out +++ ql/src/test/results/clientpositive/spark/join_map_ppr.q.out @@ -104,14 +104,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -124,18 +125,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -188,36 +184,29 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -225,13 +214,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -241,96 +228,55 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + /src [y] + Map 4 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col6, _col11 - input vertices: - 0 Map 1 - 2 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -338,11 +284,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -352,26 +300,66 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] + /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 {VALUE._col0} + outputColumnNames: _col0, _col6, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -669,14 +657,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -689,18 +678,14 @@ STAGE PLANS: isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - 2 {value} - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(key) (type: double) - 2 UDFToDouble(key) (type: double) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -753,50 +738,41 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - 2 {value} - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(key) (type: double) - 2 UDFToDouble(key) (type: double) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src_copy input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value - columns.comments defaultdefault - columns.types string:string + columns.comments + columns.types int:string #### A masked pattern was here #### - name default.srcpart + name default.src_copy numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src_copy { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -806,113 +782,69 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value - columns.comments defaultdefault - columns.types string:string + columns.comments + columns.types int:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src_copy + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src_copy { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src_copy + name: default.src_copy Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + /src_copy [y] + Map 4 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(key) (type: double) - 2 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col6, _col11 - input vertices: - 0 Map 1 - 2 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1 - numFiles 1 - numRows 107 - rawDataSize 2018 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2125 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 2 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src_copy + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value - columns.comments - columns.types int:string + columns.comments defaultdefault + columns.types string:string #### A masked pattern was here #### - name default.src_copy + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src_copy { i32 key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -922,26 +854,71 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value - columns.comments - columns.types int:string + columns.comments defaultdefault + columns.types string:string #### A masked pattern was here #### - name default.src_copy - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src_copy { i32 key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_copy - name: default.src_copy + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src_copy [y] + /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + 2 {VALUE._col1} + outputColumnNames: _col0, _col6, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col11 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 1 + numRows 107 + rawDataSize 2018 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2125 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out index 057c0f7..666bf8b 100644 --- ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out +++ ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out @@ -5,97 +5,78 @@ POSTHOOK: query: explain select count(*) from srcpart a join srcpart b on a.key = b.key and a.hr = b.hr join srcpart c on a.hr = c.hr and a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: string), hr (type: string) - 1 key (type: string), hr (type: string) - 2 key (type: string), hr (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: string), hr (type: string) - 1 key (type: string), hr (type: string) - 2 key (type: string), hr (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: string), hr (type: string) - 1 key (type: string), hr (type: string) - 2 key (type: string), hr (type: string) - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), hr (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), hr (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/join_merging.q.out ql/src/test/results/clientpositive/spark/join_merging.q.out index a67f8b8..0a902a9 100644 --- ql/src/test/results/clientpositive/spark/join_merging.q.out +++ ql/src/test/results/clientpositive/spark/join_merging.q.out @@ -9,13 +9,14 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey p1.p_size > 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -26,73 +27,56 @@ STAGE PLANS: Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_size} - 1 {p_size} - 2 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - 2 p_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Map 3 Map Operator Tree: TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_size} - 1 {p_size} - 2 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - 2 p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Map 4 Map Operator Tree: TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {p_size} - 1 {p_size} - 2 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - 2 p_partkey (type: int) - outputColumnNames: _col5, _col17 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {VALUE._col4} + 1 {VALUE._col4} + 2 + outputColumnNames: _col5, _col17 + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: int), _col17 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int), _col17 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -111,13 +95,14 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey p1.p_size > 10 and p1.p_size > p2.p_size + 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -128,76 +113,59 @@ STAGE PLANS: Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_size} - 1 {p_size} - 2 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - 2 p_partkey (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Map 3 Map Operator Tree: TableScan alias: p2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_size} - 1 {p_size} - 2 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - 2 p_partkey (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: p_size (type: int) + Map 4 Map Operator Tree: TableScan alias: p3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {p_size} - 1 {p_size} - 2 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - 2 p_partkey (type: int) - outputColumnNames: _col5, _col17 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col5 > (_col17 + 10)) (type: boolean) + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {VALUE._col4} + 1 {VALUE._col4} + 2 + outputColumnNames: _col5, _col17 + Statistics: Num rows: 57 Data size: 6923 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col5 > (_col17 + 10)) (type: boolean) + Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: int), _col17 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int), _col17 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 19 Data size: 2307 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_rc.q.out ql/src/test/results/clientpositive/spark/join_rc.q.out index 6eb98f3..3fb086a 100644 --- ql/src/test/results/clientpositive/spark/join_rc.q.out +++ ql/src/test/results/clientpositive/spark/join_rc.q.out @@ -47,71 +47,64 @@ select join_rc1.key, join_rc2.value FROM join_rc1 JOIN join_rc2 ON join_rc1.key = join_rc2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: join_rc2 + alias: join_rc1 Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: join_rc1 + alias: join_rc2 Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder.q.out ql/src/test/results/clientpositive/spark/join_reorder.q.out index 2b8b233..96b78b1 100644 --- ql/src/test/results/clientpositive/spark/join_reorder.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder.q.out @@ -57,13 +57,14 @@ EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key SELECT a.key, a.val, c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -74,21 +75,13 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} - keys: - 0 UDFToDouble(key) (type: double) - 1 (key + 1) (type: double) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Map 3 Map Operator Tree: TableScan alias: c @@ -96,32 +89,33 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} - keys: - 0 UDFToDouble(key) (type: double) - 1 (key + 1) (type: double) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -136,13 +130,14 @@ POSTHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ a.key, a.val, c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,21 +148,13 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} - keys: - 0 UDFToDouble(key) (type: double) - 1 (key + 1) (type: double) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Map 3 Map Operator Tree: TableScan alias: c @@ -175,32 +162,33 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} - keys: - 0 UDFToDouble(key) (type: double) - 1 (key + 1) (type: double) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -251,98 +239,85 @@ POSTHOOK: query: EXPLAIN FROM T1 a SELECT a.key, b.key, a.val, c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 2 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col5} - 1 {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} - 1 {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - outputColumnNames: _col0, _col1, _col5, _col11 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col11 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -361,98 +336,85 @@ POSTHOOK: query: EXPLAIN FROM T1 a SELECT /*+ STREAMTABLE(a) */ a.key, b.key, a.val, c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 2 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col5} - 1 {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} - 1 {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - outputColumnNames: _col0, _col1, _col5, _col11 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col11 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col1 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder2.q.out ql/src/test/results/clientpositive/spark/join_reorder2.q.out index c5135d5..8b30a73 100644 --- ql/src/test/results/clientpositive/spark/join_reorder2.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder2.q.out @@ -75,125 +75,97 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 4 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - 3 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -235,16 +207,33 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON a.key + 1 = d.key + 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 3), Reducer 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan alias: b @@ -252,16 +241,13 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 6 Map Operator Tree: TableScan alias: c @@ -269,16 +255,13 @@ STAGE PLANS: Filter Operator predicate: val is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} - keys: - 0 _col1 (type: string) - 1 val (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) + Map 7 Map Operator Tree: TableScan alias: d @@ -286,80 +269,65 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {val} - keys: - 0 (_col0 + 1) (type: double) - 1 (key + 1) (type: double) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {val} - keys: - 0 (_col0 + 1) (type: double) - 1 (key + 1) (type: double) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: key (type: string), val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + 1) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + 1) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder3.q.out ql/src/test/results/clientpositive/spark/join_reorder3.q.out index 98dc1a7..8670b48 100644 --- ql/src/test/results/clientpositive/spark/join_reorder3.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder3.q.out @@ -75,125 +75,97 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 4 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - 3 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -235,16 +207,33 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON a.key + 1 = d.key + 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 3), Reducer 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan alias: b @@ -252,16 +241,13 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key + 1) is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 6 Map Operator Tree: TableScan alias: c @@ -269,16 +255,13 @@ STAGE PLANS: Filter Operator predicate: val is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} - keys: - 0 _col1 (type: string) - 1 val (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) + Map 7 Map Operator Tree: TableScan alias: d @@ -286,80 +269,65 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {val} - keys: - 0 (_col0 + 1) (type: double) - 1 (key + 1) (type: double) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key is not null and val is not null) and (key + 1) is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {val} - keys: - 0 (_col0 + 1) (type: double) - 1 (key + 1) (type: double) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: key (type: string), val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: (_col0 + 1) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + 1) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_thrift.q.out ql/src/test/results/clientpositive/spark/join_thrift.q.out index 11ffd70..34dbb95 100644 --- ql/src/test/results/clientpositive/spark/join_thrift.q.out +++ ql/src/test/results/clientpositive/spark/join_thrift.q.out @@ -31,71 +31,64 @@ JOIN src_thrift s2 ON s1.aint = s2.aint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: aint is not null (type: boolean) Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {aint} - 1 {lintstring} - keys: - 0 aint (type: int) - 1 aint (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: aint (type: int) + sort order: + + Map-reduce partition columns: aint (type: int) + Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: s1 + alias: s2 Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: aint is not null (type: boolean) Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {aint} - 1 {lintstring} - keys: - 0 aint (type: int) - 1 aint (type: int) - outputColumnNames: _col0, _col17 - input vertices: - 1 Map 2 - Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col17 (type: array>) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: aint (type: int) + sort order: + + Map-reduce partition columns: aint (type: int) + Statistics: Num rows: 6 Data size: 1674 Basic stats: COMPLETE Column stats: NONE + value expressions: lintstring (type: array>) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col3} + outputColumnNames: _col0, _col17 + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col17 (type: array>) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_vc.q.out ql/src/test/results/clientpositive/spark/join_vc.q.out index caa23fe..2c2228e 100644 --- ql/src/test/results/clientpositive/spark/join_vc.q.out +++ ql/src/test/results/clientpositive/spark/join_vc.q.out @@ -11,16 +11,32 @@ POSTHOOK: query: -- see HIVE-4033 earlier a flag named hasVC was not initialized explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: t2 @@ -28,16 +44,13 @@ STAGE PLANS: Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 6 Map Operator Tree: TableScan alias: t3 @@ -45,68 +58,47 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {key} {BLOCK__OFFSET__INSIDE__FILE} - keys: - 0 _col6 (type: string) - 1 value (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {key} {value} {BLOCK__OFFSET__INSIDE__FILE} - keys: - 0 _col6 (type: string) - 1 value (type: string) - outputColumnNames: _col10, _col11, _col12 - input vertices: - 1 Map 4 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: bigint), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: string) - sort order: +++ - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col0} + outputColumnNames: _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} + outputColumnNames: _col10, _col11, _col12 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: bigint), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -148,71 +140,64 @@ select t2.BLOCK__OFFSET__INSIDE__FILE from src t1 join src t2 on t1.key = t2.key where t1.key < 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {BLOCK__OFFSET__INSIDE__FILE} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 100) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {BLOCK__OFFSET__INSIDE__FILE} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col7 - input vertices: - 1 Map 2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col7 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: BLOCK__OFFSET__INSIDE__FILE (type: bigint) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col1} + outputColumnNames: _col7 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col7 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_view.q.out ql/src/test/results/clientpositive/spark/join_view.q.out index b11b855..88ed110 100644 --- ql/src/test/results/clientpositive/spark/join_view.q.out +++ ql/src/test/results/clientpositive/spark/join_view.q.out @@ -43,26 +43,39 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from v where ds='2011-09-01' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Local Work: - Map Reduce Local Work + Map 3 + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col1, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col6 (type: int), '2011-09-01' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/limit_pushdown.q.out ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 4d1013d..83a5888 100644 --- ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -802,15 +802,17 @@ join on subq.key=subq2.key limit 4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 4 (GROUP, 3) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 3), Reducer 6 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -835,38 +837,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Reducer 2 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (GROUP, 3) - Reducer 5 <- Reducer 4 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -888,7 +859,51 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 4 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -904,9 +919,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string), _col1 (type: bigint) - Reducer 5 - Local Work: - Map Reduce Local Work + Reducer 6 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) @@ -918,33 +931,12 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 4 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out index 196deef..e7ec338 100644 --- ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out +++ ql/src/test/results/clientpositive/spark/louter_join_ppr.q.out @@ -105,46 +105,42 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -152,13 +148,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -168,29 +162,55 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -229,77 +249,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -307,11 +264,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -321,26 +280,64 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -489,48 +486,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(ds = '2008-04-08')} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string), ds (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -538,11 +532,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -552,102 +548,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(ds = '2008-04-08')} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col7, _col8 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -688,12 +611,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 - hr 12 + ds 2008-04-09 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -734,12 +657,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -778,14 +701,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + /srcpart/ds=2008-04-09/hr=11 [a] + /srcpart/ds=2008-04-09/hr=12 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -793,13 +739,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -809,26 +753,71 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a] - /srcpart/ds=2008-04-08/hr=12 [a] - /srcpart/ds=2008-04-09/hr=11 [a] - /srcpart/ds=2008-04-09/hr=12 [a] + /src [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(VALUE._col1 = '2008-04-08')} + 1 + outputColumnNames: _col0, _col1, _col7, _col8 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -981,46 +970,42 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} {ds} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1028,13 +1013,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1044,29 +1027,55 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src + Truncated Path -> Alias: + /src [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1107,12 +1116,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-09 - hr 11 + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1153,12 +1162,12 @@ STAGE PLANS: name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-09 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1197,79 +1206,14 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [b] - /srcpart/ds=2008-04-08/hr=12 [b] - /srcpart/ds=2008-04-09/hr=11 [b] - /srcpart/ds=2008-04-09/hr=12 [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} {ds} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col7 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1277,11 +1221,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1291,26 +1237,66 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [a] + /srcpart/ds=2008-04-08/hr=11 [b] + /srcpart/ds=2008-04-08/hr=12 [b] + /srcpart/ds=2008-04-09/hr=11 [b] + /srcpart/ds=2008-04-09/hr=12 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col7 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (((_col5 > 15) and (_col5 < 25)) and (_col7 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1463,43 +1449,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1507,11 +1495,13 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1521,97 +1511,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [b] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((key > 10) and (key < 20)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col7, _col8 - input vertices: - 1 Map 2 - Position of Big Table: 0 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1650,14 +1572,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [a] + /srcpart/ds=2008-04-08/hr=12 [a] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1665,13 +1608,11 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1681,24 +1622,66 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [a] - /srcpart/ds=2008-04-08/hr=12 [a] + /src [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col7, _col8 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col7 > 15) and (_col7 < 25)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin1.q.out ql/src/test/results/clientpositive/spark/mapjoin1.q.out index 0e5907c..80e38b9 100644 --- ql/src/test/results/clientpositive/spark/mapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin1.q.out @@ -30,13 +30,14 @@ EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -44,60 +45,50 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - filter predicates: - 0 - 1 {true} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {true} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -113,16 +104,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -238 val_238 238 val_238 -238 val_238 238 val_238 -86 val_86 86 val_86 -311 val_311 311 val_311 -311 val_311 311 val_311 -311 val_311 311 val_311 -27 val_27 27 val_27 -165 val_165 165 val_165 -165 val_165 165 val_165 -409 val_409 409 val_409 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +105 val_105 105 val_105 PREHOOK: query: -- func filter on outer join EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 @@ -132,13 +123,14 @@ EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -146,60 +138,50 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - filter predicates: - 0 - 1 {((key * 10) < '1000')} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {((KEY.reducesinkkey0 * 10) < '1000')} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -215,16 +197,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -NULL NULL 238 val_238 -86 val_86 86 val_86 -NULL NULL 311 val_311 -27 val_27 27 val_27 -NULL NULL 165 val_165 -NULL NULL 409 val_409 -NULL NULL 255 val_255 -NULL NULL 278 val_278 -98 val_98 98 val_98 -98 val_98 98 val_98 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +NULL NULL 105 val_105 PREHOOK: query: -- field filter on outer join EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN @@ -236,35 +218,14 @@ SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {_col0} {_col1} - filter predicates: - 0 - 1 {(_col1.key > 200)} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -276,38 +237,50 @@ STAGE PLANS: expressions: key (type: string), named_struct('key',key,'value',value) (type: struct) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {_col0} {_col1} - filter predicates: - 0 - 1 {(_col1.key > 200)} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(VALUE._col0.key > 200)} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -325,16 +298,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -238 val_238 238 {"key":"238","value":"val_238"} -238 val_238 238 {"key":"238","value":"val_238"} -NULL NULL 86 {"key":"86","value":"val_86"} -311 val_311 311 {"key":"311","value":"val_311"} -311 val_311 311 {"key":"311","value":"val_311"} -311 val_311 311 {"key":"311","value":"val_311"} -NULL NULL 27 {"key":"27","value":"val_27"} -NULL NULL 165 {"key":"165","value":"val_165"} -409 val_409 409 {"key":"409","value":"val_409"} -409 val_409 409 {"key":"409","value":"val_409"} +NULL NULL 0 {"key":"0","value":"val_0"} +NULL NULL 0 {"key":"0","value":"val_0"} +NULL NULL 0 {"key":"0","value":"val_0"} +NULL NULL 105 {"key":"105","value":"val_105"} +NULL NULL 111 {"key":"111","value":"val_111"} +NULL NULL 114 {"key":"114","value":"val_114"} +NULL NULL 12 {"key":"12","value":"val_12"} +NULL NULL 12 {"key":"12","value":"val_12"} +NULL NULL 120 {"key":"120","value":"val_120"} +NULL NULL 120 {"key":"120","value":"val_120"} PREHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 PREHOOK: type: QUERY @@ -342,13 +315,14 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND true limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -356,54 +330,47 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -419,16 +386,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -238 val_238 238 val_238 -238 val_238 238 val_238 -86 val_86 86 val_86 -311 val_311 311 val_311 -311 val_311 311 val_311 -311 val_311 311 val_311 -27 val_27 27 val_27 -165 val_165 165 val_165 -165 val_165 165 val_165 -409 val_409 409 val_409 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +105 val_105 105 val_105 PREHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 PREHOOK: type: QUERY @@ -436,13 +403,14 @@ POSTHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on a.key=b.key AND b.key * 10 < '1000' limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -453,21 +421,13 @@ STAGE PLANS: Filter Operator predicate: ((key * 10) < '1000') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -475,35 +435,36 @@ STAGE PLANS: Filter Operator predicate: ((key * 10) < '1000') (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -519,16 +480,16 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN src b on POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -86 val_86 86 val_86 -27 val_27 27 val_27 -98 val_98 98 val_98 -98 val_98 98 val_98 -66 val_66 66 val_66 -37 val_37 37 val_37 -37 val_37 37 val_37 -15 val_15 15 val_15 -15 val_15 15 val_15 -82 val_82 82 val_82 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +12 val_12 12 val_12 PREHOOK: query: EXPLAIN SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 @@ -538,32 +499,14 @@ SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN (select key, named_struct('key', key, 'value', value) as kv from src) b on a.key=b.key AND b.kv.key > 200 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {_col0} {_col1} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -578,35 +521,47 @@ STAGE PLANS: Filter Operator predicate: (_col1.key > 200) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {_col0} {_col1} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -624,13 +579,13 @@ POSTHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM src a RIGHT OUTER JOIN POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -238 val_238 238 {"key":"238","value":"val_238"} -238 val_238 238 {"key":"238","value":"val_238"} -311 val_311 311 {"key":"311","value":"val_311"} -311 val_311 311 {"key":"311","value":"val_311"} -311 val_311 311 {"key":"311","value":"val_311"} -409 val_409 409 {"key":"409","value":"val_409"} -409 val_409 409 {"key":"409","value":"val_409"} -409 val_409 409 {"key":"409","value":"val_409"} -255 val_255 255 {"key":"255","value":"val_255"} -255 val_255 255 {"key":"255","value":"val_255"} +201 val_201 201 {"key":"201","value":"val_201"} +207 val_207 207 {"key":"207","value":"val_207"} +207 val_207 207 {"key":"207","value":"val_207"} +207 val_207 207 {"key":"207","value":"val_207"} +207 val_207 207 {"key":"207","value":"val_207"} +213 val_213 213 {"key":"213","value":"val_213"} +213 val_213 213 {"key":"213","value":"val_213"} +213 val_213 213 {"key":"213","value":"val_213"} +213 val_213 213 {"key":"213","value":"val_213"} +216 val_216 216 {"key":"216","value":"val_216"} diff --git ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out index fcc8d6a..dc7241c 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_distinct.q.out @@ -11,75 +11,67 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: partials @@ -90,7 +82,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -156,74 +148,66 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial @@ -288,70 +272,62 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 3) + Reducer 4 <- Reducer 3 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 3) - Reducer 3 <- Reducer 2 (GROUP, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: partial1 @@ -362,7 +338,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) @@ -428,69 +404,61 @@ ON ( c.key=d.key AND c.ds='2008-04-08' AND d.ds='2008-04-08') SELECT /*+ MAPJOIN(d) */ DISTINCT c.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: complete diff --git ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out index 80e730e..3b80437 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out @@ -53,13 +53,15 @@ SELECT /*+ mapjoin(src1, src2) */ * FROM src1 SORT BY src1.key, src2.key, src3.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -70,22 +72,13 @@ STAGE PLANS: Filter Operator predicate: ((key < 300) and (key < 10)) (type: boolean) Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: src2 @@ -93,29 +86,13 @@ STAGE PLANS: Filter Operator predicate: (key < 300) (type: boolean) Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 8 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: src3 @@ -123,39 +100,38 @@ STAGE PLANS: Filter Operator predicate: (key < 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 {(key > 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) - sort order: +++ - Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) - Local Work: - Map Reduce Local Work - Reducer 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + sort order: +++ + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out index a8cc26e..fdf8f24 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out @@ -244,13 +244,15 @@ SELECT /*+ mapjoin(src1, src2) */ * FROM src1 SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -258,72 +260,54 @@ STAGE PLANS: TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: src3 + alias: src2 Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: src2 + alias: src3 Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - sort order: ++++++ - Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/mergejoins.q.out ql/src/test/results/clientpositive/spark/mergejoins.q.out index 0ab5602..220f542 100644 --- ql/src/test/results/clientpositive/spark/mergejoins.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins.q.out @@ -43,16 +43,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from a join b on a.val1=b.val1 join c on a.val1=c.val1 join d on a.val1=d.val1 join e on a.val2=e.val2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: (val1 is not null and val2 is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val1 (type: int) + sort order: + + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: int) + Map 4 Map Operator Tree: TableScan alias: b @@ -60,20 +76,13 @@ STAGE PLANS: Filter Operator predicate: val1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val1} {val2} - 1 {val2} - 2 {val1} {val2} - 3 {val1} {val2} - keys: - 0 val1 (type: int) - 1 val1 (type: int) - 2 val1 (type: int) - 3 val1 (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: val1 (type: int) + sort order: + + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -81,20 +90,13 @@ STAGE PLANS: Filter Operator predicate: val1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val1} {val2} - 1 {val1} {val2} - 2 {val2} - 3 {val1} {val2} - keys: - 0 val1 (type: int) - 1 val1 (type: int) - 2 val1 (type: int) - 3 val1 (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: val1 (type: int) + sort order: + + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: int) + Map 6 Map Operator Tree: TableScan alias: d @@ -102,20 +104,13 @@ STAGE PLANS: Filter Operator predicate: val1 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val1} {val2} - 1 {val1} {val2} - 2 {val1} {val2} - 3 {val2} - keys: - 0 val1 (type: int) - 1 val1 (type: int) - 2 val1 (type: int) - 3 val1 (type: int) - Local Work: - Map Reduce Local Work - Map 5 + Reduce Output Operator + key expressions: val1 (type: int) + sort order: + + Map-reduce partition columns: val1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val2 (type: int) + Map 7 Map Operator Tree: TableScan alias: e @@ -123,75 +118,53 @@ STAGE PLANS: Filter Operator predicate: val2 is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} {_col15} {_col16} - 1 {val1} - keys: - 0 _col1 (type: int) - 1 val2 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a + Reduce Output Operator + key expressions: val2 (type: int) + sort order: + + Map-reduce partition columns: val2 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (val1 is not null and val2 is not null) (type: boolean) + value expressions: _col0 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} {VALUE._col14} {VALUE._col15} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16, _col20, _col21 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int), _col20 (type: int), _col21 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - condition expressions: - 0 {val1} {val2} - 1 {val1} {val2} - 2 {val1} {val2} - 3 {val1} {val2} - keys: - 0 val1 (type: int) - 1 val1 (type: int) - 2 val1 (type: int) - 3 val1 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} {_col15} {_col16} - 1 {val1} {val2} - keys: - 0 _col1 (type: int) - 1 val2 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16, _col20, _col21 - input vertices: - 1 Map 5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int), _col20 (type: int), _col21 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -206,99 +179,76 @@ POSTHOOK: query: --HIVE-3070 filter on outer join condition removed while mergin explain select * from src a join src b on a.key=b.key left outer join src c on b.key=c.key and b.key<10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - filter predicates: - 0 - 1 {(key < 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - filter predicates: - 0 - 1 {(key < 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - filter predicates: - 0 - 1 {(key < 10)} - 2 - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out index 8575c5b..3e2c40d 100644 --- ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out @@ -19,113 +19,85 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - 3 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - Left Outer Join0 to 3 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -140,13 +112,14 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) left outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -154,99 +127,70 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - Right Outer Join0 to 3 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 0 Map 1 - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + Right Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -261,13 +205,14 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -275,99 +220,70 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Map 4 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - 3 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join1 to 2 - Left Outer Join0 to 3 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 0 Map 1 - 1 Map 2 - 3 Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + Left Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -382,13 +298,14 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.key=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -396,99 +313,70 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join1 to 2 - Right Outer Join0 to 3 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - 3 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 0 Map 1 - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + Right Outer Join0 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -505,114 +393,100 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 Map Operator Tree: TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 4 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -627,14 +501,15 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -642,105 +517,84 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col10} {_col11} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + value expressions: value (type: string) + Map 6 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 0 Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col15 (type: string), _col16 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -865,131 +719,121 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) right outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 7 Map Operator Tree: TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1004,51 +848,16 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1059,43 +868,40 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - Local Work: - Map Reduce Local Work + value expressions: value (type: string) Map 5 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator @@ -1108,6 +914,38 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: Outer Join 0 to 1 condition expressions: 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} @@ -1139,131 +977,121 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) left outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 4 + value expressions: value (type: string) + Map 6 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 7 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} {_col10} {_col11} - 1 {key} {value} - keys: - 0 _col0 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1278,102 +1106,58 @@ POSTHOOK: query: explain select * from a join a b on (a.key=b.key) right outer join a c on (b.value=c.key) full outer join a d on (a.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 3 + value expressions: value (type: string) + Map 6 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - Local Work: - Map Reduce Local Work - Map 5 + value expressions: value (type: string) + Map 7 Map Operator Tree: TableScan alias: d @@ -1384,6 +1168,38 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -1421,16 +1237,32 @@ explain select * from a join a b on (a.key=b.key) left outer join a c on (b.value=c.key) left outer join a d on (c.key=d.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -1438,103 +1270,73 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {value} - 2 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 6 Map Operator Tree: TableScan alias: d Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {value} - 2 {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {value} - 2 {key} {value} - keys: - 0 _col6 (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out index d6e82fb..06ca49d 100644 --- ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out +++ ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out @@ -214,18 +214,88 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Reducer 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + auto parallelism: false + Path -> Alias: + -mr-10003default.src{} [src] + Path -> Partition: + -mr-10003default.src{} + Partition + base file name: src + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + -mr-10003default.src{} [src] + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -245,12 +315,12 @@ STAGE PLANS: value expressions: _col0 (type: string) auto parallelism: false Path -> Alias: - -mr-10005default.srcpart{ds=2008-04-08, hr=11} [srcpart] - -mr-10006default.srcpart{ds=2008-04-08, hr=12} [srcpart] - -mr-10007default.srcpart{ds=2008-04-09, hr=11} [srcpart] - -mr-10008default.srcpart{ds=2008-04-09, hr=12} [srcpart] + -mr-10004default.srcpart{ds=2008-04-08, hr=11} [srcpart] + -mr-10005default.srcpart{ds=2008-04-08, hr=12} [srcpart] + -mr-10006default.srcpart{ds=2008-04-09, hr=11} [srcpart] + -mr-10007default.srcpart{ds=2008-04-09, hr=12} [srcpart] Path -> Partition: - -mr-10005default.srcpart{ds=2008-04-08, hr=11} + -mr-10004default.srcpart{ds=2008-04-08, hr=11} Partition base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -296,7 +366,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - -mr-10006default.srcpart{ds=2008-04-08, hr=12} + -mr-10005default.srcpart{ds=2008-04-08, hr=12} Partition base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -342,7 +412,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - -mr-10007default.srcpart{ds=2008-04-09, hr=11} + -mr-10006default.srcpart{ds=2008-04-09, hr=11} Partition base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -388,7 +458,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - -mr-10008default.srcpart{ds=2008-04-09, hr=12} + -mr-10007default.srcpart{ds=2008-04-09, hr=12} Partition base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -435,13 +505,47 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - -mr-10005default.srcpart{ds=2008-04-08, hr=11} [srcpart] - -mr-10006default.srcpart{ds=2008-04-08, hr=12} [srcpart] - -mr-10007default.srcpart{ds=2008-04-09, hr=11} [srcpart] - -mr-10008default.srcpart{ds=2008-04-09, hr=12} [srcpart] - Reducer 3 - Local Work: - Map Reduce Local Work + -mr-10004default.srcpart{ds=2008-04-08, hr=11} [srcpart] + -mr-10005default.srcpart{ds=2008-04-08, hr=12} [srcpart] + -mr-10006default.srcpart{ds=2008-04-09, hr=11} [srcpart] + -mr-10007default.srcpart{ds=2008-04-09, hr=12} [srcpart] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -451,123 +555,13 @@ STAGE PLANS: Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} - 1 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 3 - Position of Big Table: 0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: - -mr-10004default.src{} [src] - Path -> Partition: - -mr-10004default.src{} - Partition - base file name: src - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - -mr-10004default.src{} [src] + tag: 1 + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -1087,18 +1081,87 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: false (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: false + Path -> Alias: + -mr-10003default.src{} [src] + Path -> Partition: + -mr-10003default.src{} + Partition + base file name: src + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + -mr-10003default.src{} [src] + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -1118,12 +1181,12 @@ STAGE PLANS: value expressions: _col0 (type: string) auto parallelism: false Path -> Alias: - -mr-10005default.srcpart{ds=2008-04-08, hr=11} [srcpart] - -mr-10006default.srcpart{ds=2008-04-08, hr=12} [srcpart] - -mr-10007default.srcpart{ds=2008-04-09, hr=11} [srcpart] - -mr-10008default.srcpart{ds=2008-04-09, hr=12} [srcpart] + -mr-10004default.srcpart{ds=2008-04-08, hr=11} [srcpart] + -mr-10005default.srcpart{ds=2008-04-08, hr=12} [srcpart] + -mr-10006default.srcpart{ds=2008-04-09, hr=11} [srcpart] + -mr-10007default.srcpart{ds=2008-04-09, hr=12} [srcpart] Path -> Partition: - -mr-10005default.srcpart{ds=2008-04-08, hr=11} + -mr-10004default.srcpart{ds=2008-04-08, hr=11} Partition base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1169,7 +1232,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - -mr-10006default.srcpart{ds=2008-04-08, hr=12} + -mr-10005default.srcpart{ds=2008-04-08, hr=12} Partition base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1215,7 +1278,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - -mr-10007default.srcpart{ds=2008-04-09, hr=11} + -mr-10006default.srcpart{ds=2008-04-09, hr=11} Partition base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1261,7 +1324,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - -mr-10008default.srcpart{ds=2008-04-09, hr=12} + -mr-10007default.srcpart{ds=2008-04-09, hr=12} Partition base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1308,13 +1371,47 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - -mr-10005default.srcpart{ds=2008-04-08, hr=11} [srcpart] - -mr-10006default.srcpart{ds=2008-04-08, hr=12} [srcpart] - -mr-10007default.srcpart{ds=2008-04-09, hr=11} [srcpart] - -mr-10008default.srcpart{ds=2008-04-09, hr=12} [srcpart] - Reducer 3 - Local Work: - Map Reduce Local Work + -mr-10004default.srcpart{ds=2008-04-08, hr=11} [srcpart] + -mr-10005default.srcpart{ds=2008-04-08, hr=12} [srcpart] + -mr-10006default.srcpart{ds=2008-04-09, hr=11} [srcpart] + -mr-10007default.srcpart{ds=2008-04-09, hr=12} [srcpart] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -1324,123 +1421,12 @@ STAGE PLANS: Limit Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col0} - keys: - 0 - 1 - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: false (type: boolean) + Reduce Output Operator + sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} - 1 {_col0} - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 3 - Position of Big Table: 0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: - -mr-10004default.src{} [src] - Path -> Partition: - -mr-10004default.src{} - Partition - base file name: src - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value - columns.comments defaultdefault - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - -mr-10004default.src{} [src] + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -1532,23 +1518,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Union 2 <- Map 1 (NONE, 0), Map 3 (NONE, 0) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Union 2 <- Map 1 (NONE, 0), Reducer 4 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -1638,55 +1616,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: false (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Position of Big Table: 0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: -#### A masked pattern was here #### + -mr-10003default.src{} [src] Path -> Partition: -#### A masked pattern was here #### + -mr-10003default.src{} Partition base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true @@ -1701,10 +1650,10 @@ STAGE PLANS: rawDataSize 5312 serialization.ddl struct src { string key, string value} serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe totalSize 5812 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.NullStructSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1728,7 +1677,41 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [src] + -mr-10003default.src{} [src] + Map 5 + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Union 2 Vertex: Union 2 @@ -1789,39 +1772,38 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: s2 + alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} - keys: - 0 value (type: string) - 1 value (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: 0 + value expressions: key (type: string) + auto parallelism: false Path -> Alias: - -mr-10005default.src{} [s2] + -mr-10004default.src{} [s1] Path -> Partition: - -mr-10005default.src{} + -mr-10004default.src{} Partition base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1866,67 +1848,29 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10005default.src{} [s2] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + -mr-10004default.src{} [s1] + Map 3 Map Operator Tree: TableScan - alias: s1 + alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: false (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 value (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Position of Big Table: 0 + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + tag: 1 + value expressions: key (type: string) + auto parallelism: false Path -> Alias: - -mr-10004default.src{} [s1] + -mr-10003default.src{} [s2] Path -> Partition: - -mr-10004default.src{} + -mr-10003default.src{} Partition base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -1971,7 +1915,43 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10004default.src{} [s1] + -mr-10003default.src{} [s2] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/parallel_join0.q.out ql/src/test/results/clientpositive/spark/parallel_join0.q.out index a23bc88..f6e5d6c 100644 --- ql/src/test/results/clientpositive/spark/parallel_join0.q.out +++ ql/src/test/results/clientpositive/spark/parallel_join0.q.out @@ -19,16 +19,18 @@ SELECT src1.key as k1, src1.value as v1, SORT BY k1, v1, k2, v2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 4) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -40,23 +42,11 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 4) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src @@ -68,27 +58,26 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - sort order: ++++ - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/spark/parallel_join1.q.out ql/src/test/results/clientpositive/spark/parallel_join1.q.out index 3473f11..0a905b7 100644 --- ql/src/test/results/clientpositive/spark/parallel_join1.q.out +++ ql/src/test/results/clientpositive/spark/parallel_join1.q.out @@ -19,73 +19,66 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out index eb00be3..9cf9879 100644 --- ql/src/test/results/clientpositive/spark/pcr.q.out +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -2746,35 +2746,35 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2824,51 +2824,25 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [t2] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /pcr_t1/ds=2000-04-08 [t1] + Map 4 Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6, _col7 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2918,8 +2892,30 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [t1] + /pcr_t1/ds=2000-04-08 [t2] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col6, _col7 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator @@ -3058,45 +3054,45 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2000-04-09 + base file name: ds=2000-04-08 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2000-04-09 + ds 2000-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -3136,61 +3132,35 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-09 [t2] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /pcr_t1/ds=2000-04-08 [t1] + Map 4 Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col6, _col7 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: ds=2000-04-08 + base file name: ds=2000-04-09 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2000-04-08 + ds 2000-04-09 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -3230,8 +3200,30 @@ STAGE PLANS: name: default.pcr_t1 name: default.pcr_t1 Truncated Path -> Alias: - /pcr_t1/ds=2000-04-08 [t1] + /pcr_t1/ds=2000-04-09 [t2] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col6, _col7 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: string) + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 11 Data size: 88 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col3 (type: int), _col4 (type: string) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index 56d6186..5c70ef1 100644 --- ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -19,13 +19,15 @@ WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '5 GROUP BY src1.c1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,23 +45,13 @@ STAGE PLANS: Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src @@ -74,40 +66,40 @@ STAGE PLANS: Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -155,13 +147,15 @@ WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '5 GROUP BY src1.c1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -176,23 +170,13 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src @@ -204,40 +188,40 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join.q.out ql/src/test/results/clientpositive/spark/ppd_join.q.out index 02b7d19..b524f4a 100644 --- ql/src/test/results/clientpositive/spark/ppd_join.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join.q.out @@ -21,13 +21,14 @@ ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -45,21 +46,13 @@ STAGE PLANS: Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan alias: src @@ -74,35 +67,36 @@ STAGE PLANS: Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -567,13 +561,14 @@ ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -588,21 +583,13 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan alias: src @@ -614,35 +601,36 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 19 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '20') and ((_col1 < 'val_50') or (_col0 > '2'))) and ((_col2 > '50') or (_col0 < '50'))) and (_col2 <> '4')) (type: boolean) + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join2.q.out ql/src/test/results/clientpositive/spark/ppd_join2.q.out index a2925c7..2786a51 100644 --- ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -27,14 +27,15 @@ ON src1.c2 = src3.c6 WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,21 +53,13 @@ STAGE PLANS: Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src @@ -81,37 +74,13 @@ STAGE PLANS: Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col2} {_col3} - 1 {_col0} - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 5 Map Operator Tree: TableScan alias: src @@ -126,35 +95,55 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} - 1 {_col0} - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 0 Map 2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1731,14 +1720,15 @@ ON src1.c2 = src3.c6 WHERE src1.c1 <> '311' and (src1.c2 <> 'val_50' or src1.c1 > '1') and (src2.c3 <> '10' or src1.c1 <> '10') and (src2.c3 <> '14') and (sqrt(src3.c5) <> 13) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -1753,21 +1743,13 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src @@ -1779,34 +1761,13 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col2} {_col3} - 1 {_col0} - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 5 Map Operator Tree: TableScan alias: src @@ -1818,35 +1779,52 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} - 1 {_col0} - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 0 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 <> '311') and ((_col1 <> 'val_50') or (_col0 > '1'))) and ((_col2 <> '10') or (_col0 <> '10'))) and (_col2 <> '14')) and (sqrt(_col4) <> 13)) (type: boolean) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join3.q.out ql/src/test/results/clientpositive/spark/ppd_join3.q.out index 75d576d..6f2d5ba 100644 --- ql/src/test/results/clientpositive/spark/ppd_join3.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join3.q.out @@ -27,120 +27,104 @@ ON src1.c1 = src3.c5 WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - 2 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 < '400') (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - 2 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 52 Data size: 558 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) - Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 18 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 52 Data size: 558 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) + Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 311 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1774,111 +1758,95 @@ ON src1.c1 = src3.c5 WHERE src1.c1 > '0' and (src1.c2 <> 'val_500' or src1.c1 > '1') and (src2.c3 > '10' or src1.c1 <> '10') and (src2.c3 <> '4') and (src3.c5 <> '1') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - 2 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 3 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + predicate: (((((key <> '12') and (key < '400')) and (key <> '4')) and (key > '0')) and (key <> '1')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((((key <> '11') and (key < '400')) and (key > '0')) and ((value <> 'val_500') or (key > '1'))) and (key <> '4')) and (key <> '1')) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + predicate: (((((key <> '13') and (key < '400')) and (key <> '1')) and (key > '0')) and (key <> '4')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} {_col1} - 2 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 160 Data size: 1705 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) - Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 160 Data size: 1705 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '0') and ((_col1 <> 'val_500') or (_col0 > '1'))) and ((_col2 > '10') or (_col0 <> '10'))) and (_col2 <> '4')) and (_col4 <> '1')) (type: boolean) + Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 93 Data size: 991 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join5.q.out ql/src/test/results/clientpositive/spark/ppd_join5.q.out index ce8cf45..fc32b70 100644 --- ql/src/test/results/clientpositive/spark/ppd_join5.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join5.q.out @@ -43,16 +43,31 @@ select a.*,b.d d1,c.d d2 from join t2 c on (a.id2 = b.id) where b.d <= 1 and c.d <= 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (id1 is not null and id2 is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id1 (type: string), id2 (type: string) + sort order: ++ + Map-reduce partition columns: id1 (type: string), id2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -60,16 +75,13 @@ STAGE PLANS: Filter Operator predicate: (id is not null and (d <= 1)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {id1} {id2} - 1 {d} - keys: - 0 id1 (type: string), id2 (type: string) - 1 id (type: string), id (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: id (type: string), id (type: string) + sort order: ++ + Map-reduce partition columns: id (type: string), id (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -77,67 +89,45 @@ STAGE PLANS: Filter Operator predicate: (d <= 1) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col6} - 1 {d} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (id1 is not null and id2 is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {id1} {id2} - 1 {d} - keys: - 0 id1 (type: string), id2 (type: string) - 1 id (type: string), id (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col6} - 1 {d} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col6, _col11 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: d (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col6} + 1 {VALUE._col1} + outputColumnNames: _col0, _col1, _col6, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -160,16 +150,31 @@ select a.*,b.d d1,c.d d2 from ) z where d1 > 1 or d2 > 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (id1 is not null and id2 is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id1 (type: string), id2 (type: string) + sort order: ++ + Map-reduce partition columns: id1 (type: string), id2 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -177,16 +182,13 @@ STAGE PLANS: Filter Operator predicate: (id is not null and (d <= 1)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {id1} {id2} - 1 {d} - keys: - 0 id1 (type: string), id2 (type: string) - 1 id (type: string), id (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: id (type: string), id (type: string) + sort order: ++ + Map-reduce partition columns: id (type: string), id (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -194,70 +196,48 @@ STAGE PLANS: Filter Operator predicate: (d <= 1) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col6} - 1 {d} - keys: - 0 - 1 - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (id1 is not null and id2 is not null) (type: boolean) + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col6} + 1 {VALUE._col1} + outputColumnNames: _col0, _col1, _col6, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: ((_col6 > 1) or (_col11 > 1)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {id1} {id2} - 1 {d} - keys: - 0 id1 (type: string), id2 (type: string) - 1 id (type: string), id (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 2 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col6} - 1 {d} - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col6, _col11 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: ((_col6 > 1) or (_col11 > 1)) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out index 04b2548..13b186b 100644 --- ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -120,15 +120,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -204,39 +204,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Reducer 2 - Local Work: - Map Reduce Local Work - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) - Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {_col3} {_col4} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -246,47 +214,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {_col3} {_col4} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col8, _col9 - input vertices: - 1 Reducer 2 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:double:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -336,6 +270,67 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) + Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col2} {VALUE._col3} + outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -499,15 +494,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 3) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -583,39 +578,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Reducer 2 - Local Work: - Map Reduce Local Work - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (_col2 < 5.0) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {_col3} {_col4} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -625,47 +588,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {_col3} {_col4} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col8, _col9 - input vertices: - 1 Reducer 2 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:double:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -715,6 +644,67 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col2} {VALUE._col3} + outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -878,15 +868,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 3) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -962,39 +952,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Reducer 2 - Local Work: - Map Reduce Local Work - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) - Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {_col3} {_col4} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -1004,47 +962,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {_col3} {_col4} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col8, _col9 - input vertices: - 1 Reducer 2 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:double:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1094,6 +1018,67 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) + Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col2} {VALUE._col3} + outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1257,15 +1242,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 3) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -1341,39 +1326,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Reducer 2 - Local Work: - Map Reduce Local Work - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (_col2 < 5.0) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {_col3} {_col4} - keys: - 0 key (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -1383,47 +1336,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {_col3} {_col4} - keys: - 0 key (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col8, _col9 - input vertices: - 1 Reducer 2 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2 - columns.types string:double:double - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1473,6 +1392,67 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col2} {VALUE._col3} + outputColumnNames: _col0, _col8, _col9 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col8 (type: double), _col9 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types string:double:double + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out index 124510c..277214e 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join1.q.out @@ -21,74 +21,68 @@ EXPLAIN WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > 10) and (_col0 < 20)) and (_col5 > 15)) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -141,74 +135,68 @@ POSTHOOK: query: EXPLAIN WHERE a.key > 10 AND a.key < 20 AND b.key > 15 AND b.key < 25 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col5 > 15) and (_col5 < 25)) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out index 375ba82..48967a7 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join2.q.out @@ -21,13 +21,14 @@ EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -38,21 +39,13 @@ STAGE PLANS: Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -60,35 +53,36 @@ STAGE PLANS: Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -261,13 +255,14 @@ POSTHOOK: query: EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -278,21 +273,13 @@ STAGE PLANS: Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -300,35 +287,36 @@ STAGE PLANS: Filter Operator predicate: ((key > '15') and (key < '25')) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > '10') and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out index 11c7c14..d88e683 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join4.q.out @@ -27,13 +27,14 @@ EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -44,18 +45,13 @@ STAGE PLANS: Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -63,23 +59,13 @@ STAGE PLANS: Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c @@ -87,39 +73,37 @@ STAGE PLANS: Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) and (sqrt(_col10) <> 13)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((_col0 > '10') and (_col0 < '20')) and (_col5 > '15')) and (_col5 < '25')) and (sqrt(_col10) <> 13)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -408,13 +392,14 @@ POSTHOOK: query: EXPLAIN WHERE a.key > '10' AND a.key < '20' AND b.key > '15' AND b.key < '25' AND sqrt(c.key) <> 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -425,18 +410,13 @@ STAGE PLANS: Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -444,23 +424,13 @@ STAGE PLANS: Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c @@ -468,39 +438,37 @@ STAGE PLANS: Filter Operator predicate: (sqrt(key) <> 13) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col5 > '15') and (_col5 < '25')) and (_col0 > '10')) and (_col0 < '20')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out index 537497c..0e359eb 100644 --- ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/ppd_outer_join5.q.out @@ -121,96 +121,79 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t2.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {id} {key} {value} - keys: - 0 20 (type: int) - 1 20 (type: int) - 2 id (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Map 3 Map Operator Tree: TableScan - alias: t3 + alias: t2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 20 (type: int) - 1 20 (type: int) - 2 id (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 4 Map Operator Tree: TableScan - alias: t1 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {id} {key} {value} - keys: - 0 20 (type: int) - 1 20 (type: int) - 2 id (type: int) - outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 - input vertices: - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -223,96 +206,79 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 join t2 on (t1.id=t2.id) left outer join t3 on (t1.id=t3.id) where t2.id=20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {id} {key} {value} - keys: - 0 20 (type: int) - 1 20 (type: int) - 2 id (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) Map 3 Map Operator Tree: TableScan - alias: t3 + alias: t2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 20 (type: int) - 1 20 (type: int) - 2 id (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: 20 (type: int) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 4 Map Operator Tree: TableScan - alias: t1 + alias: t3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator predicate: (id = 20) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {id} {key} {value} - keys: - 0 20 (type: int) - 1 20 (type: int) - 2 id (type: int) - outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 - input vertices: - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + 2 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col1, _col2, _col7, _col8, _col12, _col13, _col14 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 20 (type: int), _col1 (type: string), _col2 (type: string), 20 (type: int), _col7 (type: string), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/router_join_ppr.q.out ql/src/test/results/clientpositive/spark/router_join_ppr.q.out index 7a1462c..6e20e16 100644 --- ql/src/test/results/clientpositive/spark/router_join_ppr.q.out +++ ql/src/test/results/clientpositive/spark/router_join_ppr.q.out @@ -105,13 +105,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -124,21 +125,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(ds = '2008-04-08')} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -188,12 +182,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -203,56 +192,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - filter mappings: - 1 [0, 1] - filter predicates: - 0 - 1 {(ds = '2008-04-08')} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -445,6 +392,51 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [b] /srcpart/ds=2008-04-09/hr=11 [b] /srcpart/ds=2008-04-09/hr=12 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + filter mappings: + 1 [0, 1] + filter predicates: + 0 + 1 {(VALUE._col1 = '2008-04-08')} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -597,13 +589,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -616,16 +609,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -724,12 +715,7 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -739,51 +725,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col7, _col8 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -833,6 +782,46 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col7, _col8 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -981,13 +970,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -1000,16 +990,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1059,12 +1047,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -1074,51 +1057,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1217,6 +1163,46 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [b] /srcpart/ds=2008-04-08/hr=12 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 122 Data size: 1296 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -1365,13 +1351,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -1384,16 +1371,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} {ds} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 222 Data size: 2358 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string), ds (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1586,12 +1571,7 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [a] /srcpart/ds=2008-04-09/hr=11 [a] /srcpart/ds=2008-04-09/hr=12 [a] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -1601,51 +1581,14 @@ STAGE PLANS: isSamplingPred: false predicate: ((key > 15) and (key < 25)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} {ds} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col7, _col8 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean) - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1695,6 +1638,46 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col7, _col8 + Statistics: Num rows: 244 Data size: 2593 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (((_col0 > 10) and (_col0 < 20)) and (_col2 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 13 Data size: 138 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/sample8.q.out ql/src/test/results/clientpositive/spark/sample8.q.out index f211e70..772111b 100644 --- ql/src/test/results/clientpositive/spark/sample8.q.out +++ ql/src/test/results/clientpositive/spark/sample8.q.out @@ -85,13 +85,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,16 +105,13 @@ STAGE PLANS: isSamplingPred: true predicate: (((((hash(key) & 2147483647) % 10) = 0) and value is not null) and (((hash(key) & 2147483647) % 1) = 0)) (type: boolean) Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -165,12 +163,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: t @@ -180,51 +173,13 @@ STAGE PLANS: isSamplingPred: true predicate: (((((hash(key) & 2147483647) % 1) = 0) and value is not null) and (((hash(key) & 2147483647) % 10) = 0)) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - outputColumnNames: _col0, _col1, _col7, _col8 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col7 = _col0) and (_col8 = _col1)) (type: boolean) - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3 - columns.types string:string:string:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -417,6 +372,46 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [t] /srcpart/ds=2008-04-09/hr=11 [t] /srcpart/ds=2008-04-09/hr=12 [t] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col7, _col8 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col7 = _col0) and (_col8 = _col1)) (type: boolean) + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -805,36 +800,14 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src TABLESAMPLE(100 ROWS) a JOIN src1 TABLESAMPLE(10 ROWS) b ON a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Row Limit Per Split: 10 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -846,32 +819,48 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Row Limit Per Split: 10 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -902,36 +891,14 @@ POSTHOOK: query: EXPLAIN SELECT * FROM src TABLESAMPLE(100 ROWS) a, src1 TABLESAMPLE(10 ROWS) b WHERE a.key=b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Row Limit Per Split: 10 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -943,35 +910,51 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = _col5) (type: boolean) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Row Limit Per Split: 10 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col5) (type: boolean) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out index 4cc5213..2b8e04b 100644 --- ql/src/test/results/clientpositive/spark/semijoin.q.out +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -119,16 +119,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -145,51 +161,27 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -229,16 +221,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -255,51 +263,27 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -341,16 +325,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -367,51 +367,27 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -445,16 +421,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -467,55 +459,31 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: int) - 1 _col1 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 @@ -560,16 +528,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -586,51 +570,27 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -667,13 +627,15 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -693,23 +655,12 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 51 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -717,29 +668,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator @@ -778,13 +730,15 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -804,23 +758,12 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -828,29 +771,30 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator @@ -886,13 +830,15 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -912,23 +858,12 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -936,25 +871,26 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator @@ -995,16 +931,31 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -1021,51 +972,27 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 @@ -1118,16 +1045,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -1144,51 +1087,27 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: (2 * _col0) (type: int) + sort order: + + Map-reduce partition columns: (2 * _col0) (type: int) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -1226,13 +1145,15 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -1243,20 +1164,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 4 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1271,60 +1201,32 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - 2 Map 4 - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) - Local Work: - Map Reduce Local Work + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Reducer 3 Reduce Operator Tree: Select Operator @@ -1376,16 +1278,31 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: string) + Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -1402,51 +1319,27 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 6 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -1494,16 +1387,31 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -1520,18 +1428,12 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 43 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: c @@ -1548,57 +1450,29 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Left Semi Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 24 Data size: 178 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 @@ -1648,32 +1522,38 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: c @@ -1687,54 +1567,29 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Semi Join 1 to 2 - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 @@ -1796,32 +1651,38 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - Map 4 + alias: a + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: c @@ -1835,52 +1696,27 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Left Semi Join 1 to 2 - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 0 Map 1 - 2 Map 4 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Semi Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Select Operator @@ -2079,16 +1915,28 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -2102,70 +1950,39 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Outer Join0 to 2 - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Left Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 @@ -2230,13 +2047,15 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -2244,18 +2063,12 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -2269,53 +2082,38 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 4 <- Map 3 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join0 to 2 - condition expressions: - 0 {key} - 1 - 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - input vertices: - 0 Map 1 - 1 Map 2 - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reducer 4 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + Right Outer Join0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + 2 + outputColumnNames: _col0 + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 48 Data size: 358 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) @@ -2528,16 +2326,33 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Reducer 3 (SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 5 Map Operator Tree: TableScan alias: b @@ -2554,78 +2369,53 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: c Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 81 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 12 Data size: 89 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 13 Data size: 97 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 @@ -2696,16 +2486,31 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and (key > 100)) (type: boolean) + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + Map 3 Map Operator Tree: TableScan alias: b @@ -2722,54 +2527,32 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 value (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 22 Data size: 163 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value is not null and (key > 100)) (type: boolean) - Statistics: Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 value (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 2 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoin.q.out ql/src/test/results/clientpositive/spark/skewjoin.q.out index 58b91b8..56b78be 100644 --- ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -79,73 +79,66 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 Stage: Stage-0 Move Operator @@ -194,125 +187,97 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 4 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - 3 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -354,125 +319,97 @@ FROM T1 a JOIN T2 b ON a.key = b.key JOIN T4 d ON c.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 4 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - 3 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: d Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - Inner Join 2 to 3 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - 3 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - 3 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - input vertices: - 1 Map 2 - 2 Map 3 - 3 Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 2 to 3 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + 3 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -506,13 +443,15 @@ PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM T1 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -523,23 +462,13 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} - keys: - 0 UDFToDouble(key) (type: double) - 1 (key + 1) (type: double) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Map 4 Map Operator Tree: TableScan alias: c @@ -547,30 +476,31 @@ STAGE PLANS: Filter Operator predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} - keys: - 0 UDFToDouble(key) (type: double) - 1 (key + 1) (type: double) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 0 Map 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col5)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -622,16 +552,18 @@ ON (x.key = Y.key) SELECT sum(hash(Y.key)), sum(hash(Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src @@ -640,26 +572,15 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: src @@ -668,35 +589,36 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial @@ -754,81 +676,73 @@ ON (x.key = Y.key and substring(x.value, 5)=substring(y.value, 5)+1) SELECT sum(hash(Y.key)), sum(hash(Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and (substring(value, 5) + 1) is not null) (type: boolean) + predicate: (key is not null and UDFToDouble(substring(value, 5)) is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {_col1} - keys: - 0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) - 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key is not null and UDFToDouble(substring(value, 5)) is not null) (type: boolean) + predicate: (key is not null and (substring(value, 5) + 1) is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {_col0} {_col1} - keys: - 0 _col0 (type: string), UDFToDouble(substring(_col1, 5)) (type: double) - 1 _col0 (type: string), (substring(_col1, 5) + 1) (type: double) - outputColumnNames: _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), (substring(_col1, 5) + 1) (type: double) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial @@ -894,110 +808,92 @@ JOIN ON src1.c1 = src3.c5 AND src3.c5 < 80 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 80) and (key < 100)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col1} - 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 100) and (key < 80)) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: (((key < 100) and (key < 80)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {_col1} - 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < 80) and (key < 100)) (type: boolean) + predicate: ((key < 100) and (key < 80)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {_col0} - 1 {_col1} - 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col3)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 + outputColumnNames: _col0, _col3 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col3)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial @@ -1053,34 +949,15 @@ POSTHOOK: query: EXPLAIN SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1 k LEFT OUTER JOIN T1 v ON k.key+1=v.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: v - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {val} - keys: - 0 (key + 1) (type: double) - 1 UDFToDouble(key) (type: double) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1088,32 +965,44 @@ STAGE PLANS: TableScan alias: k Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} - 1 {val} - keys: - 0 (key + 1) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col6)) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: (key + 1) (type: double) + sort order: + + Map-reduce partition columns: (key + 1) (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: key (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: v + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + value expressions: val (type: string) Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col6)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out index 1e5a71c..a6ac032 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out @@ -5,71 +5,63 @@ POSTHOOK: query: explain create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0 - Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -92,7 +84,7 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Create Table Operator: Create Table columns: key string, value string diff --git ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out index db4fdc2..db92598 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out @@ -61,122 +61,112 @@ EXPLAIN SELECT * FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -213,14 +203,16 @@ EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -231,88 +223,41 @@ STAGE PLANS: Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 5 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 3 - Vertex: Union 3 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan alias: a @@ -320,15 +265,50 @@ STAGE PLANS: Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -373,102 +353,114 @@ INSERT OVERWRITE TABLE DEST1 SELECT * FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 + Map Operator Tree: + TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Union 3 + Vertex: Union 3 Stage: Stage-0 Move Operator @@ -480,28 +472,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - PREHOOK: query: INSERT OVERWRITE TABLE DEST1 SELECT * FROM T1 a JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY @@ -543,14 +513,16 @@ INSERT OVERWRITE TABLE DEST1 SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -561,82 +533,92 @@ STAGE PLANS: Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 5 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Union 3 Vertex: Union 3 @@ -650,28 +632,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - PREHOOK: query: INSERT OVERWRITE TABLE DEST1 SELECT * FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out index 72e6182..bbe60a7 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out @@ -73,135 +73,61 @@ EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - 2 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 7 - Map Operator Tree: - TableScan - alias: a - Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 5 - 2 Map 6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 6 Map Operator Tree: TableScan alias: b @@ -209,18 +135,13 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 6 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 8 Map Operator Tree: TableScan alias: c @@ -228,17 +149,68 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out index 483f7a9..c3d550b 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out @@ -47,122 +47,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -199,14 +189,16 @@ EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -217,88 +209,41 @@ STAGE PLANS: Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 5 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 3 - Vertex: Union 3 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan alias: a @@ -306,15 +251,50 @@ STAGE PLANS: Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -353,94 +333,87 @@ EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) - Reducer 3 <- Union 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 4 - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 6 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 5 - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -458,30 +431,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -507,14 +473,17 @@ POSTHOOK: query: EXPLAIN SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -525,75 +494,65 @@ STAGE PLANS: Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 3 <- Map 2 (NONE, 0), Map 5 (NONE, 0) - Reducer 4 <- Union 3 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key = '2') or (key = '3')) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 1 - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work - Map 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 6 - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (not ((key = '2') or (key = '3'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -612,31 +571,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Union 3 Vertex: Union 3 - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (not ((key = '2') or (key = '3'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out index b125f6a..4bb908a 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out @@ -49,14 +49,16 @@ explain select * from (select a.key as key, b.value as array_val from T1 a join array_valued_T1 b on a.key=b.key) i lateral view explode (array_val) c as val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -67,140 +69,40 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (key = '8')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 1 - Select Operator - expressions: _col0 (type: string), _col6 (type: array) - outputColumnNames: _col0, _col1 - Lateral View Forward - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 4 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: array) + Map 5 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 0 Map 5 - Select Operator - expressions: _col0 (type: string), _col6 (type: array) - outputColumnNames: _col0, _col1 - Lateral View Forward - Select Operator - SELECT * : (no compute) - expressions: _col0 (type: string), _col1 (type: array) - outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Select Operator - expressions: _col1 (type: array) - outputColumnNames: _col0 - UDTF Operator - function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 3 - Vertex: Union 3 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: array) + Map 7 Map Operator Tree: TableScan alias: a @@ -208,15 +110,101 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Select Operator + expressions: _col0 (type: string), _col6 (type: array) + outputColumnNames: _col0, _col1 + Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Select Operator + expressions: _col0 (type: string), _col6 (type: array) + outputColumnNames: _col0, _col1 + Lateral View Forward + Select Operator + SELECT * : (no compute) + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc, org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col1 (type: array) + outputColumnNames: _col0 + UDTF Operator + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: array), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out index 83fbe47..bb1111a 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out @@ -59,178 +59,77 @@ select * from ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3, Stage-4, Stage-5 - Stage-3 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-2, Stage-3 - Stage-5 depends on stages: Stage-2, Stage-3, Stage-4 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0), Map 7 (NONE, 0), Map 9 (NONE, 0) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 12 (NONE, 0), Reducer 2 (NONE, 0), Reducer 6 (NONE, 0), Reducer 9 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 10 Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 5 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 7 + predicate: (key is not null and (not (key = '2'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 11 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 9 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 13 Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col6 - input vertices: - 1 Map 8 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -238,42 +137,40 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan alias: b Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) + predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-5 - Spark -#### A masked pattern was here #### - Vertices: + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 8 Map Operator Tree: TableScan @@ -282,15 +179,98 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out index 037170a..468139c 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out @@ -49,122 +49,108 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') and (val = '12')) or ((key = '8') and (val = '18'))) or ((key = '3') and (val = '13'))))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out index 8e0ab17..75e5af1 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out @@ -75,16 +75,32 @@ T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and val is not null) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan alias: b @@ -92,16 +108,13 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan alias: c @@ -109,67 +122,49 @@ STAGE PLANS: Filter Operator predicate: val is not null (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} - keys: - 0 _col1 (type: string) - 1 val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and val is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col5} {_col6} - 1 {key} {val} - keys: - 0 _col1 (type: string) - 1 val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} {VALUE._col4} {VALUE._col5} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out index 6429622..b3dcb0a 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out @@ -79,69 +79,50 @@ T1 a join T2 b on a.key = b.key join T3 c on a.val = c.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) - Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 8 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 6 + Map Operator Tree: + TableScan alias: c Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator @@ -156,29 +137,46 @@ STAGE PLANS: Map 7 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (not (key = '2'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 9 + Map Operator Tree: + TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 6 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 4 Reduce Operator Tree: Join Operator condition map: @@ -199,30 +197,22 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out index a35ebc1..e62e886 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out @@ -87,122 +87,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -239,14 +229,16 @@ EXPLAIN SELECT a.*, b.* FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -257,88 +249,41 @@ STAGE PLANS: Filter Operator predicate: ((key = 2) or (key = 3)) (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val} - 1 {key} {val} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 3 <- Map 2 (NONE, 0), Map 4 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key = 2) or (key = 3)) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 4 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 5 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 3 - Vertex: Union 3 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan alias: a @@ -346,15 +291,50 @@ STAGE PLANS: Filter Operator predicate: (not ((key = 2) or (key = 3))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {val} - 1 {key} {val} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -393,94 +373,87 @@ EXPLAIN SELECT count(1) FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) - Reducer 3 <- Union 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and ((key = 2) or (key = 3))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 4 - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 5 - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -498,30 +471,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (not ((key = 2) or (key = 3)))) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -547,14 +513,17 @@ POSTHOOK: query: EXPLAIN SELECT count(1) FROM T1 a RIGHT OUTER JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -565,75 +534,65 @@ STAGE PLANS: Filter Operator predicate: ((key = 2) or (key = 3)) (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 3 <- Map 2 (NONE, 0), Map 5 (NONE, 0) - Reducer 4 <- Union 3 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key = 2) or (key = 3)) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 0 Map 1 - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work - Map 5 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 0 Map 6 - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not ((key = 2) or (key = 3))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -652,31 +611,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Union 3 Vertex: Union 3 - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not ((key = 2) or (key = 3))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out index 1e96de0..771b16e 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out @@ -49,122 +49,108 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out index 7d14ac1..f59f1e4 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out @@ -53,122 +53,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -261,122 +251,108 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (((key = '2') and (val = '12')) or (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not (((key = '2') and (val = '12')) or (key = '2')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out index 2ee4ed7..ddb975e 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt18.q.out @@ -75,35 +75,14 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(key) (type: double) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -114,32 +93,47 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string), val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out index 3b347ef..bac15f6 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out @@ -51,122 +51,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out index dde0fe4..f6a47de 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out @@ -57,122 +57,108 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -206,122 +192,108 @@ EXPLAIN SELECT a.*, b.* FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -359,102 +331,91 @@ EXPLAIN SELECT a.key, count(1) FROM T1 a JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) - Reducer 3 <- Union 2 (GROUP, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 6 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -473,30 +434,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: ((key is not null and val is not null) and (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -523,102 +481,91 @@ POSTHOOK: query: EXPLAIN SELECT a.key, count(1) FROM T1 a LEFT OUTER JOIN T2 b ON a.key = b.key and a.val = b.val group by a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) - Reducer 3 <- Union 2 (GROUP, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8')) (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 6 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), val (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -637,30 +584,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (not ((((key = '2') or (key = '7')) or (key = '3')) or (key = '8'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: string), val (type: string) - 1 key (type: string), val (type: string) - Local Work: - Map Reduce Local Work + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out index 49d9e99..4150c23 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out @@ -51,122 +51,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out index 2575359..5640384 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -51,122 +51,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out index 801129c..353e1b8 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out @@ -47,122 +47,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -197,122 +187,112 @@ EXPLAIN SELECT a.*, b.* FROM T2 a JOIN T1 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (key = '2')) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (key = '2'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out index 6bf9984..4fe594c 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out @@ -49,122 +49,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '2') or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not ((key = '2') or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out index b6052f4..fc1e81c 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out @@ -51,122 +51,112 @@ EXPLAIN SELECT a.*, b.* FROM T1 a JOIN T2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 5 Map Operator Tree: TableScan - alias: a + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {val} - 1 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 7 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out index f16750e..f76ebbd 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out @@ -67,135 +67,61 @@ EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and (((key = '2') or (key = '8')) or (key = '3'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - 2 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 7 - Map Operator Tree: - TableScan - alias: a - Filter Operator - predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 5 - 2 Map 6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 6 Map Operator Tree: TableScan alias: b @@ -203,18 +129,13 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 6 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 8 Map Operator Tree: TableScan alias: c @@ -222,17 +143,68 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (not (((key = '2') or (key = '8')) or (key = '3')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out index 5fd2665..466d70e 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out @@ -65,135 +65,61 @@ EXPLAIN SELECT a.*, b.*, c.* FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2, Stage-3 - Stage-3 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Map 4 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Union 2 <- Map 1 (NONE, 0), Map 7 (NONE, 0) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 5 Map Operator Tree: TableScan - alias: a + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: (key is not null and ((key = '3') or (key = '8'))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 3 - 2 Map 4 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Map 7 - Map Operator Tree: - TableScan - alias: a - Filter Operator - predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 5 - 2 Map 6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Union 2 - Vertex: Union 2 - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 5 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 6 Map Operator Tree: TableScan alias: b @@ -201,18 +127,13 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {val} - 2 {key} {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work - Map 6 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 8 Map Operator Tree: TableScan alias: c @@ -220,17 +141,68 @@ STAGE PLANS: Filter Operator predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {val} - 1 {key} {val} - 2 {val} - keys: - 0 key (type: string) - 1 key (type: string) - 2 key (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (not ((key = '3') or (key = '8')))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 3 + Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out index fbbe8fe..bc0b1f7 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out @@ -195,37 +195,15 @@ select key, count(1) as cnt from T1 group by key join T2 b on subq1.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 {val} - keys: - 0 _col0 (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 3) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -248,9 +226,21 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -258,30 +248,33 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {key} {val} - keys: - 0 _col0 (type: string) - 1 key (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out index 026f83e..d769ebe 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin9.q.out @@ -106,26 +106,54 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 - Local Work: - Map Reduce Local Work + Map 3 + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -239,26 +267,54 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 - Local Work: - Map Reduce Local Work + Map 3 + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:string:int + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -317,74 +373,67 @@ hive_test_smb_bucket2 b ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0 - Stage-2 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col6, _col7 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_mapjoin9_results - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col6, _col7 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: int), _col7 (type: string), '2010-10-15' (type: string), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_mapjoin9_results Stage: Stage-0 Move Operator @@ -392,7 +441,7 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-4 + Stage: Stage-3 Create Table Operator: Create Table columns: k1 int, value string, ds string, k2 int diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out index fbe3166..8d0527e 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -57,13 +57,14 @@ explain select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -74,21 +75,13 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -96,32 +89,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -146,32 +140,14 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -179,32 +155,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -234,13 +222,14 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -248,51 +237,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -407,13 +389,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -424,21 +407,13 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -446,32 +421,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -496,32 +472,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -529,32 +487,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -584,13 +554,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -598,51 +569,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out index cc511e4..2df87cf 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_10.q.out @@ -73,71 +73,63 @@ on (a.ds = '1' and b.ds = '2' and a.type = b.type) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((userid is not null and pageid is not null) and postid is not null) and type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {userid} {pageid} {postid} {type} - 1 - keys: - 0 userid (type: int), pageid (type: int), postid (type: int), type (type: string) - 1 userid (type: int), pageid (type: int), postid (type: int), type (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: userid (type: int), pageid (type: int), postid (type: int), type (type: string) + sort order: ++++ + Map-reduce partition columns: userid (type: int), pageid (type: int), postid (type: int), type (type: string) + Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 3 Data size: 414 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((userid is not null and pageid is not null) and postid is not null) and type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {userid} {pageid} {postid} {type} - 1 {userid} {pageid} {postid} {type} - keys: - 0 userid (type: int), pageid (type: int), postid (type: int), type (type: string) - 1 userid (type: int), pageid (type: int), postid (type: int), type (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col8, _col9, _col10, _col11 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), '1' (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), '2' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: userid (type: int), pageid (type: int), postid (type: int), type (type: string) + sort order: ++++ + Map-reduce partition columns: userid (type: int), pageid (type: int), postid (type: int), type (type: string) + Statistics: Num rows: 1 Data size: 138 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {KEY.reducesinkkey2} {KEY.reducesinkkey3} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {KEY.reducesinkkey2} {KEY.reducesinkkey3} + outputColumnNames: _col0, _col1, _col2, _col3, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string), '1' (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col11 (type: string), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out index 14cfa00..5637206 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out @@ -121,13 +121,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -140,16 +142,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {value} {key} - keys: - 0 key (type: int) - 1 value (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -203,14 +203,7 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1 [a] - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -220,33 +213,14 @@ STAGE PLANS: isSamplingPred: false predicate: value is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} {key} - keys: - 0 key (type: int) - 1 value (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: value (type: int) + sort order: + + Map-reduce partition columns: value (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -300,6 +274,28 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -413,41 +409,41 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: UDFToDouble(value) is not null (type: boolean) + predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(value) (type: double) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: int), value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table4 + base file name: test_table3 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -459,11 +455,11 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table4 + name default.test_table3 numFiles 16 numRows 0 rawDataSize 0 - serialization.ddl struct test_table4 { i32 key, string value} + serialization.ddl struct test_table3 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -481,70 +477,44 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table4 + name default.test_table3 numFiles 16 numRows 0 rawDataSize 0 - serialization.ddl struct test_table4 { i32 key, string value} + serialization.ddl struct test_table3 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table4 - name: default.test_table4 + name: default.test_table3 + name: default.test_table3 Truncated Path -> Alias: - /test_table4 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /test_table3 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: UDFToDouble(key) is not null (type: boolean) + predicate: UDFToDouble(value) is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(value) (type: double) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: UDFToDouble(value) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(value) (type: double) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: int), value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: test_table3 + base file name: test_table4 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -556,11 +526,11 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table3 + name default.test_table4 numFiles 16 numRows 0 rawDataSize 0 - serialization.ddl struct test_table3 { i32 key, string value} + serialization.ddl struct test_table4 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -578,21 +548,43 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.test_table3 + name default.test_table4 numFiles 16 numRows 0 rawDataSize 0 - serialization.ddl struct test_table3 { i32 key, string value} + serialization.ddl struct test_table4 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - name: default.test_table3 + name: default.test_table4 + name: default.test_table4 Truncated Path -> Alias: - /test_table3 [a] + /test_table4 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out index 451ef9e..3aed084 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_14.q.out @@ -51,72 +51,63 @@ select count(*) from ( ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -176,77 +167,68 @@ group by key order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 3) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 3) - Reducer 3 <- Reducer 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) @@ -258,7 +240,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) @@ -327,77 +309,68 @@ select count(*) from ) subq2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 3) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 3) - Reducer 3 <- Reducer 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) @@ -415,7 +388,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -484,16 +457,18 @@ select /*+mapjoin(subq1)*/ count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -505,23 +480,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -533,31 +497,31 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -629,13 +593,15 @@ select /*+mapjoin(subq2)*/ count(*) from on subq2.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -650,23 +616,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -674,29 +629,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -794,16 +749,18 @@ select /*+mapjoin(subq2)*/ count(*) from on subq2.key = subq4.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -815,23 +772,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -843,31 +789,31 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -953,16 +899,18 @@ select /*+mapjoin(subq1)*/ count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -974,23 +922,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -1002,31 +939,31 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1088,16 +1025,18 @@ select /*+mapjoin(subq1)*/ count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -1109,23 +1048,12 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -1137,31 +1065,31 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1219,13 +1147,15 @@ select /*+mapjoin(subq1)*/ count(*) from join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1240,23 +1170,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -1264,29 +1183,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1342,13 +1261,15 @@ select /*+mapjoin(a)*/ count(*) from join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1363,23 +1284,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: a @@ -1387,29 +1297,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1475,16 +1385,18 @@ select /*+mapjoin(subq1, subq2)*/ count(*) from on (subq1.key = subq3.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: a @@ -1496,17 +1408,11 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -1519,25 +1425,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: a @@ -1549,35 +1442,33 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 - 1 - 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -1657,13 +1548,15 @@ join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1678,23 +1571,12 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -1702,29 +1584,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index 75191dd..6ed680d 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -93,13 +93,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -112,16 +114,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -175,14 +175,7 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1 [a] - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -192,33 +185,14 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -272,6 +246,28 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -447,13 +443,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -466,16 +464,14 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {key2} {value} - keys: - 0 key (type: int), key2 (type: int) - 1 key (type: int), key2 (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -529,14 +525,7 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1 [a] - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -546,33 +535,14 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {key2} {value} - 1 {key} {key2} {value} - keys: - 0 key (type: int), key2 (type: int) - 1 key (type: int), key2 (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -626,6 +596,28 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -749,13 +741,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -768,16 +762,14 @@ STAGE PLANS: isSamplingPred: false predicate: (key2 is not null and key is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {key2} {value} - keys: - 0 key2 (type: int), key (type: int) - 1 key2 (type: int), key (type: int) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key2 (type: int), key (type: int) + sort order: ++ + Map-reduce partition columns: key2 (type: int), key (type: int) + Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -831,14 +823,7 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1 [a] - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -848,33 +833,14 @@ STAGE PLANS: isSamplingPred: false predicate: (key2 is not null and key is not null) (type: boolean) Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {key2} {value} - 1 {key} {key2} {value} - keys: - 0 key2 (type: int), key (type: int) - 1 key2 (type: int), key (type: int) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key2 (type: int), key (type: int) + sort order: ++ + Map-reduce partition columns: key2 (type: int), key (type: int) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -928,6 +894,28 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -1051,13 +1039,15 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1070,16 +1060,14 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key2} - 1 {key} {key2} {value} - keys: - 0 key (type: int), value (type: string) - 1 key (type: int), value (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: string) + Statistics: Num rows: 125 Data size: 1804 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key2 (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1133,14 +1121,7 @@ STAGE PLANS: name: default.test_table1 Truncated Path -> Alias: /test_table1 [a] - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Map 4 Map Operator Tree: TableScan alias: b @@ -1150,33 +1131,14 @@ STAGE PLANS: isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {key2} {value} - 1 {key} {key2} {value} - keys: - 0 key (type: int), value (type: string) - 1 key (type: int), value (type: string) - outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: string) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key2 (type: int) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1230,6 +1192,28 @@ STAGE PLANS: name: default.test_table2 Truncated Path -> Alias: /test_table2 [b] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {VALUE._col0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 137 Data size: 1984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out index 84ea5f2..a4fd7c3 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out @@ -43,13 +43,15 @@ EXPLAIN SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,23 +62,12 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -84,29 +75,29 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out index f487f60..268ae23 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out @@ -181,237 +181,138 @@ JOIN test_table6 f ON a.key = f.key JOIN test_table7 g ON a.key = g.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3), Map 6 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3), Map 9 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan - alias: d + alias: c Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan - alias: e + alias: d Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan - alias: f + alias: e Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Map 8 Map Operator Tree: TableScan - alias: g + alias: f Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map 9 Map Operator Tree: TableScan - alias: a + alias: g Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - Inner Join 0 to 4 - Inner Join 0 to 5 - Inner Join 0 to 6 - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - 3 key (type: int) - 4 key (type: int) - 5 key (type: int) - 6 key (type: int) - input vertices: - 1 Map 3 - 2 Map 4 - 3 Map 5 - 4 Map 6 - 5 Map 7 - 6 Map 8 - Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + Inner Join 0 to 4 + Inner Join 0 to 5 + Inner Join 0 to 6 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out index f7c6d74..1cf144b 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -57,13 +57,14 @@ explain select /*+mapjoin(a)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -74,21 +75,13 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -96,32 +89,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -148,32 +142,14 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -181,32 +157,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -236,13 +224,14 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -250,51 +239,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -411,13 +393,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -428,21 +411,13 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b @@ -450,32 +425,33 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -502,32 +478,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -535,32 +493,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -590,13 +560,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_1 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -604,51 +575,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index d563c80..43433c8 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -53,39 +53,31 @@ POSTHOOK: query: explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 5 (type: int) - 1 5 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan alias: b @@ -93,106 +85,94 @@ STAGE PLANS: Filter Operator predicate: (key = 5) (type: boolean) Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 5 (type: int) - 1 5 (type: int) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 5 (type: int) - 1 5 (type: int) - input vertices: - 0 Map 3 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Map 7 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 5 (type: int) - 1 5 (type: int) - input vertices: - 1 Map 2 - Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} - 1 {_col0} - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = 5) (type: boolean) - Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Statistics: Num rows: 27 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = 5) (type: boolean) + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out index d54e749..6b44d2c 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -61,35 +61,14 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -100,32 +79,47 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -152,32 +146,14 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -185,32 +161,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -239,13 +227,14 @@ POSTHOOK: query: explain select /*+mapjoin(a)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -253,51 +242,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -413,35 +395,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -452,32 +413,47 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 226 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -504,32 +480,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a left outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -537,32 +495,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -591,13 +561,14 @@ POSTHOOK: query: explain select /*+mapjoin(b)*/ * from smb_bucket_2 a right outer join smb_bucket_3 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -605,51 +576,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 0 Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index a794a85..d07d65a 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -57,13 +57,14 @@ explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -74,79 +75,63 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -173,87 +158,72 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -280,87 +250,72 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -392,13 +347,14 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -406,73 +362,57 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -608,13 +548,14 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -622,73 +563,57 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -717,13 +642,14 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -731,73 +657,57 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -828,13 +738,14 @@ POSTHOOK: query: explain select /*+mapjoin(a,b)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -842,73 +753,57 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index 10506e8..607b1f0 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -57,13 +57,14 @@ explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -74,79 +75,63 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -173,87 +158,72 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -280,87 +250,72 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -392,13 +347,14 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a left outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -406,73 +362,57 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -608,13 +548,14 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -622,73 +563,57 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -717,13 +642,14 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key left outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -731,73 +657,57 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Left Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 2 Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -828,13 +738,14 @@ POSTHOOK: query: explain select /*+mapjoin(a,c)*/ * from smb_bucket_1 a right outer join smb_bucket_2 b on a.key = b.key right outer join smb_bucket_3 c on b.key=c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -842,73 +753,57 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan alias: c Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - Right Outer Join1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - 1 Map 2 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 222 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 457 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out index 07659a4..30746ff 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out @@ -63,73 +63,67 @@ insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-0 Move Operator @@ -1237,73 +1231,67 @@ insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2646 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-0 Move Operator @@ -2427,73 +2415,67 @@ insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-0 Move Operator @@ -2533,73 +2515,67 @@ insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.smb_join_results - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.smb_join_results Stage: Stage-0 Move Operator @@ -2637,96 +2613,81 @@ POSTHOOK: query: explain select /*+mapjoin(b,c)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key join smb_bucket4_2 c on b.key = c.key where a.key>1000 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 4 Map Operator Tree: TableScan - alias: a + alias: c Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1000) (type: boolean) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {key} {value} - 1 {key} {value} - 2 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 1 Map 2 - 2 Map 3 - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col10 (type: int), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out index 21232c9..11dd631 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_1.q.out @@ -55,72 +55,63 @@ explain select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out index 3517be5..7e723d6 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_2.q.out @@ -63,72 +63,63 @@ select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out index 2e5f240..604ee48 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_3.q.out @@ -63,72 +63,63 @@ select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out index aa4cfd6..2365d7d 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_4.q.out @@ -61,72 +61,63 @@ select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key and a.value=b.value where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key is not null and value is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value (type: string) - 1 key (type: string), value (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 45 Data size: 478 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out index fa2f05e..6354ff6 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_5.q.out @@ -112,35 +112,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,13 +158,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 1 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -182,61 +181,35 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -255,13 +228,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 1 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -278,19 +251,40 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out index bd1b59f..9e480da 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_6.q.out @@ -112,35 +112,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,13 +158,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -175,67 +174,42 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -254,13 +228,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -270,26 +244,46 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_2/part=1 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out index bacdb48..5acdf70 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_7.q.out @@ -148,35 +148,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -195,13 +194,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -217,16 +216,16 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 #### A masked pattern was here #### Partition base file name: part=2 @@ -242,13 +241,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -264,62 +263,36 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_2 + name default.srcbucket_mapjoin_part_1 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_2 - name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_1 Truncated Path -> Alias: - /srcbucket_mapjoin_part_2/part=1 [b] - /srcbucket_mapjoin_part_2/part=2 [b] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + /srcbucket_mapjoin_part_1/part=1 [a] + /srcbucket_mapjoin_part_1/part=2 [a] + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: int) - 1 key (type: int) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -338,13 +311,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -360,16 +333,16 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 #### A masked pattern was here #### Partition base file name: part=2 @@ -385,13 +358,13 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 numFiles 2 numRows 500 partition_columns part partition_columns.types string rawDataSize 5312 - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -407,20 +380,41 @@ STAGE PLANS: columns.comments columns.types int:string #### A masked pattern was here #### - name default.srcbucket_mapjoin_part_1 + name default.srcbucket_mapjoin_part_2 partition_columns part partition_columns.types string - serialization.ddl struct srcbucket_mapjoin_part_1 { i32 key, string value} + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcbucket_mapjoin_part_1 - name: default.srcbucket_mapjoin_part_1 + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: - /srcbucket_mapjoin_part_1/part=1 [a] - /srcbucket_mapjoin_part_1/part=2 [a] + /srcbucket_mapjoin_part_2/part=1 [b] + /srcbucket_mapjoin_part_2/part=2 [b] Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out index 81e4f59..0cae793 100644 --- ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out +++ ql/src/test/results/clientpositive/spark/sort_merge_join_desc_8.q.out @@ -108,72 +108,63 @@ select /*+ mapjoin(b) */ count(*) from table_desc1 a join table_desc2 b on a.key=b.key where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 4 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 10) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -223,37 +214,15 @@ select /*+ mapjoin(b) */ count(*) from table_desc3 a join table_desc4 b on a.key=b.key and a.value2=b.value2 where a.key < 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key is not null and value2 is not null) and (key < 10)) (type: boolean) - Statistics: Num rows: 41 Data size: 517 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value2 (type: string) - 1 key (type: string), value2 (type: string) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -264,31 +233,44 @@ STAGE PLANS: Filter Operator predicate: ((key is not null and value2 is not null) and (key < 10)) (type: boolean) Statistics: Num rows: 41 Data size: 837 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - keys: - 0 key (type: string), value2 (type: string) - 1 key (type: string), value2 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string), value2 (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value2 (type: string) + Statistics: Num rows: 41 Data size: 837 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 6312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key is not null and value2 is not null) and (key < 10)) (type: boolean) + Statistics: Num rows: 41 Data size: 517 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value2 (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value2 (type: string) + Statistics: Num rows: 41 Data size: 517 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 45 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/subquery_exists.q.out ql/src/test/results/clientpositive/spark/subquery_exists.q.out index 5f2d6f2..58be4f5 100644 --- ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -23,16 +23,30 @@ where exists ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: value (type: string), key (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan alias: a @@ -49,54 +63,32 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 value (type: string), key (type: string) - 1 _col0 (type: string), _col1 (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 value (type: string), key (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 1049fd0..fc7a338 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -61,21 +61,43 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-4 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-2 Spark Edges: - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 9 <- Map 8 (GROUP, 1) + Reducer 4 <- Reducer 3 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 10 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: a @@ -92,16 +114,12 @@ STAGE PLANS: mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map 7 Map Operator Tree: TableScan alias: s1 @@ -113,16 +131,12 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Local Work: - Map Reduce Local Work - Map 5 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map 8 Map Operator Tree: TableScan alias: s1 @@ -141,9 +155,77 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 - Local Work: - Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Reducer 9 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -162,100 +244,9 @@ STAGE PLANS: mode: hash outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 - keys: - 0 - 1 - - Stage: Stage-2 - Spark - Edges: - Reducer 2 <- Map 1 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - input vertices: - 1 Reducer 6 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col0} - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col5 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - Reducer 2 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_5 + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-1 Move Operator @@ -319,7 +310,6 @@ POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:strin POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-5:MAPRED RUN: Stage-2:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE diff --git ql/src/test/results/clientpositive/spark/temp_table_join1.q.out ql/src/test/results/clientpositive/spark/temp_table_join1.q.out index b4277d6..f152473 100644 --- ql/src/test/results/clientpositive/spark/temp_table_join1.q.out +++ ql/src/test/results/clientpositive/spark/temp_table_join1.q.out @@ -33,71 +33,64 @@ FROM src_nontemp src1 JOIN src_nontemp src2 ON (src1.key = src2.key) SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -136,71 +129,64 @@ FROM src_nontemp src1 JOIN src_temp src2 ON (src1.key = src2.key) SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -241,71 +227,64 @@ FROM src_temp src1 JOIN src_temp src2 ON (src1.key = src2.key) SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: src2 + alias: src1 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: src1 + alias: src2 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col6 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 57 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/tez_join_tests.q.out ql/src/test/results/clientpositive/spark/tez_join_tests.q.out index 15a6751..a27abba 100644 --- ql/src/test/results/clientpositive/spark/tez_join_tests.q.out +++ ql/src/test/results/clientpositive/spark/tez_join_tests.q.out @@ -9,35 +9,17 @@ explain select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -45,77 +27,79 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reducer 2 - Local Work: - Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {key} {value} - keys: - 0 _col1 (type: string) - 1 value (type: string) - - Stage: Stage-1 - Spark - Edges: - Reducer 5 <- Map 4 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {key} {value} - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reducer 5 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out index 2e28b02..a03e9a1 100644 --- ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out +++ ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out @@ -9,35 +9,17 @@ explain select * from (select b.key, b.value from src1 a left outer join src b on (a.key = b.key) order by b.key) x right outer join src c on (x.value = c.value) order by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {value} - keys: - 0 key (type: string) - 1 key (type: string) - Local Work: - Map Reduce Local Work - - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (SORT, 1) + Reducer 5 <- Reducer 4 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -45,77 +27,79 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 - 1 {key} {value} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col5, _col6 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reducer 2 - Local Work: - Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col5, _col6 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} - 1 {key} {value} - keys: - 0 _col1 (type: string) - 1 value (type: string) - - Stage: Stage-1 - Spark - Edges: - Reducer 5 <- Map 4 (SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {key} {value} - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) Reducer 5 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out index 71052f6..c357536 100644 --- ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_bucketmapjoin1.q.out @@ -101,72 +101,67 @@ POSTHOOK: query: explain select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: b + alias: a Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -195,35 +190,14 @@ POSTHOOK: query: explain select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -234,33 +208,48 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -299,35 +288,14 @@ explain select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {key} {value} - 1 {value} - keys: - 0 key (type: int) - 1 key (type: int) - Local Work: - Map Reduce Local Work - Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -338,33 +306,48 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {key} {value} - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 5a29f76..37bf3b7 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -506,35 +506,36 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: p2 + alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -583,49 +584,25 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p2] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + /part_orc [p1] + Execution mode: vectorized + Map 5 Map Operator Tree: TableScan - alias: p1 + alias: p2 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {p_partkey} {p_name} {p_mfgr} {p_brand} {p_type} {p_size} {p_container} {p_retailprice} {p_comment} - 1 - keys: - 0 p_partkey (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - input vertices: - 1 Map 4 - Position of Big Table: 0 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -674,9 +651,28 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p1] + /part_orc [p2] Execution mode: vectorized Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false + Reducer 3 Needs Tagging: false Reduce Operator Tree: Extract @@ -695,7 +691,7 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) auto parallelism: false - Reducer 3 + Reducer 4 Needs Tagging: false Reduce Operator Tree: Extract @@ -1934,35 +1930,31 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: p1 + alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} - 1 - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2011,28 +2003,24 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p1] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + /part_orc [part_orc] + Map 4 Map Operator Tree: TableScan - alias: part_orc + alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2081,10 +2069,9 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [part_orc] + /part_orc [p1] + Execution mode: vectorized Reducer 2 - Local Work: - Map Reduce Local Work Needs Tagging: false Reduce Operator Tree: Extract @@ -2099,45 +2086,50 @@ STAGE PLANS: isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} - 1 - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int:string:string:string:string:int:string:double:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -2251,31 +2243,34 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Reducer 4 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan - alias: part_orc + alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2324,88 +2319,22 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [part_orc] - Reducer 3 - Local Work: - Map Reduce Local Work - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 - 1 {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} - keys: - 0 p_partkey (type: int) - 1 _col0 (type: int) - Position of Big Table: 0 - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /part_orc [p1] + Execution mode: vectorized + Map 3 Map Operator Tree: TableScan - alias: p1 + alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} {_col7} {_col8} - keys: - 0 p_partkey (type: int) - 1 _col0 (type: int) - outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - input vertices: - 1 Reducer 3 - Position of Big Table: 0 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 - columns.types int:string:string:string:string:int:string:double:string - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2454,8 +2383,66 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p1] - Execution mode: vectorized + /part_orc [part_orc] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8 + columns.types int:string:string:string:string:int:string:double:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false Stage: Stage-0 Fetch Operator @@ -4136,35 +4123,32 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 3), Reducer 2 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan - alias: p1 + alias: part_orc Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {_col1} {_col2} {_col5} {_col7} - 1 - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: p_mfgr (type: string), p_name (type: string) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4213,29 +4197,24 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [p1] - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 3) -#### A masked pattern was here #### - Vertices: - Map 1 + /part_orc [part_orc] + Map 5 Map Operator Tree: TableScan - alias: part_orc + alias: p1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Reduce Output Operator - key expressions: p_mfgr (type: string), p_name (type: string) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) - auto parallelism: false + Filter Operator + isSamplingPred: false + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_partkey (type: int) + sort order: + + Map-reduce partition columns: p_partkey (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4284,10 +4263,9 @@ STAGE PLANS: name: default.part_orc name: default.part_orc Truncated Path -> Alias: - /part_orc [part_orc] + /part_orc [p1] + Execution mode: vectorized Reducer 2 - Local Work: - Map Reduce Local Work Needs Tagging: false Reduce Operator Tree: Extract @@ -4302,29 +4280,34 @@ STAGE PLANS: isSamplingPred: false predicate: _col0 is not null (type: boolean) Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} {_col2} {_col5} {_col7} - 1 - keys: - 0 _col0 (type: int) - 1 p_partkey (type: int) - outputColumnNames: _col1, _col2, _col5, _col7 - input vertices: - 1 Map 4 - Position of Big Table: 0 - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col4} {VALUE._col6} + 1 + outputColumnNames: _col1, _col2, _col5, _col7 + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 14 Data size: 8823 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Extract diff --git ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index 1bafba5..e251572 100644 --- ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -7,74 +7,66 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG( JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - condition expressions: - 0 {cint} - 1 - keys: - 0 cint (type: int) - 1 cint (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 + Reduce Output Operator + key expressions: cint (type: int) + sort order: + + Map-reduce partition columns: cint (type: int) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 4 Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {cint} - 1 {cint} - keys: - 0 cint (type: int) - 1 cint (type: int) - outputColumnNames: _col2, _col17 - input vertices: - 1 Map 3 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: cint (type: int) + sort order: + + Map-reduce partition columns: cint (type: int) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col2, _col17 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/windowing.q.out ql/src/test/results/clientpositive/spark/windowing.q.out index ceaf40d..a2fe80f 100644 --- ql/src/test/results/clientpositive/spark/windowing.q.out +++ ql/src/test/results/clientpositive/spark/windowing.q.out @@ -64,26 +64,26 @@ group by p_mfgr, p_name, p_size POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 -Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 -Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 -Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 -Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 -Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 -Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 @@ -111,26 +111,26 @@ having p_size > 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 -Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 -Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 -Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 -Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 -Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 -Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 @@ -150,27 +150,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 1 +Manufacturer#3 almond antique forest lavender goldenrod 2 +Manufacturer#3 almond antique metallic orange dim 3 +Manufacturer#3 almond antique misty red olive 4 +Manufacturer#3 almond antique olive coral navajo 5 Manufacturer#1 almond antique burnished rose metallic 2 Manufacturer#1 almond antique burnished rose metallic 2 Manufacturer#1 almond antique chartreuse lavender yellow 3 Manufacturer#1 almond antique salmon chartreuse burlywood 4 Manufacturer#1 almond aquamarine burnished black steel 5 Manufacturer#1 almond aquamarine pink moccasin thistle 6 -Manufacturer#2 almond antique violet chocolate turquoise 1 -Manufacturer#2 almond antique violet turquoise frosted 2 -Manufacturer#2 almond aquamarine midnight light salmon 3 -Manufacturer#2 almond aquamarine rose maroon antique 4 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 -Manufacturer#3 almond antique chartreuse khaki white 1 -Manufacturer#3 almond antique forest lavender goldenrod 2 -Manufacturer#3 almond antique metallic orange dim 3 -Manufacturer#3 almond antique misty red olive 4 -Manufacturer#3 almond antique olive coral navajo 5 Manufacturer#4 almond antique gainsboro frosted violet 1 Manufacturer#4 almond antique violet mint lemon 2 Manufacturer#4 almond aquamarine floral ivory bisque 3 Manufacturer#4 almond aquamarine yellow dodger mint 4 Manufacturer#4 almond azure aquamarine papaya violet 5 +Manufacturer#2 almond antique violet chocolate turquoise 1 +Manufacturer#2 almond antique violet turquoise frosted 2 +Manufacturer#2 almond aquamarine midnight light salmon 3 +Manufacturer#2 almond aquamarine rose maroon antique 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 Manufacturer#5 almond antique blue firebrick mint 1 Manufacturer#5 almond antique medium spring khaki 2 Manufacturer#5 almond antique sky peru orange 3 @@ -198,27 +198,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44 Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 1173.15 2 0 Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 2346.3 2 0 Manufacturer#1 almond antique chartreuse lavender yellow 3 2 3 1753.76 4100.06 34 32 Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 4 1602.59 5702.650000000001 6 -28 Manufacturer#1 almond aquamarine burnished black steel 5 4 5 1414.42 7117.070000000001 28 22 Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 6 1632.66 8749.730000000001 42 14 -Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0 -Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26 -Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.360000000001 2 -38 -Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7 -Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0 -Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3 -Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5 -Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18 -Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44 Manufacturer#4 almond antique gainsboro frosted violet 1 1 1 1620.67 1620.67 10 0 Manufacturer#4 almond antique violet mint lemon 2 2 2 1375.42 2996.09 39 29 Manufacturer#4 almond aquamarine floral ivory bisque 3 3 3 1206.26 4202.35 27 -12 Manufacturer#4 almond aquamarine yellow dodger mint 4 4 4 1844.92 6047.27 7 -20 Manufacturer#4 almond azure aquamarine papaya violet 5 5 5 1290.35 7337.620000000001 12 5 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.360000000001 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7 Manufacturer#5 almond antique blue firebrick mint 1 1 1 1789.69 1789.69 31 0 Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.3500000000004 6 -25 Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 @@ -250,27 +250,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +1 1 1 1671.68 0 +2 2 2 2861.95 -3 +3 3 3 4272.34 5 +4 4 4 6195.32 -18 +5 5 5 7532.61 44 1 1 2 1173.15 0 1 1 2 2346.3 0 3 2 3 4100.06 32 4 3 4 5702.650000000001 -28 5 4 5 7117.070000000001 22 6 5 6 8749.730000000001 14 -1 1 1 1690.68 0 -2 2 2 3491.38 26 -3 3 3 5523.360000000001 -38 -4 4 4 7222.02 23 -5 5 5 8923.62 -7 -1 1 1 1671.68 0 -2 2 2 2861.95 -3 -3 3 3 4272.34 5 -4 4 4 6195.32 -18 -5 5 5 7532.61 44 1 1 1 1620.67 0 2 2 2 2996.09 29 3 3 3 4202.35 -12 4 4 4 6047.27 -20 5 5 5 7337.620000000001 5 +1 1 1 1690.68 0 +2 2 2 3491.38 26 +3 3 3 5523.360000000001 -38 +4 4 4 7222.02 23 +5 5 5 8923.62 -7 1 1 1 1789.69 0 2 2 2 3401.3500000000004 -25 3 3 3 5190.08 -4 @@ -302,6 +302,11 @@ order by p_name POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 1337.29 7532.61 45 44 Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 1173.15 2 0 Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 2346.3 2 0 Manufacturer#1 almond antique burnished rose metallic 1 1 1173.15 3519.4500000000003 2 0 @@ -310,21 +315,16 @@ Manufacturer#1 almond antique chartreuse lavender yellow 5 2 1753.76 6446.360000 Manufacturer#1 almond antique salmon chartreuse burlywood 6 3 1602.59 8048.950000000001 6 -28 Manufacturer#1 almond aquamarine burnished black steel 7 4 1414.42 9463.37 28 22 Manufacturer#1 almond aquamarine pink moccasin thistle 8 5 1632.66 11096.03 42 14 -Manufacturer#2 almond antique violet chocolate turquoise 1 1 1690.68 1690.68 14 0 -Manufacturer#2 almond antique violet turquoise frosted 2 2 1800.7 3491.38 40 26 -Manufacturer#2 almond aquamarine midnight light salmon 3 3 2031.98 5523.360000000001 2 -38 -Manufacturer#2 almond aquamarine rose maroon antique 4 4 1698.66 7222.02 25 23 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 1701.6 8923.62 18 -7 -Manufacturer#3 almond antique chartreuse khaki white 1 1 1671.68 1671.68 17 0 -Manufacturer#3 almond antique forest lavender goldenrod 2 2 1190.27 2861.95 14 -3 -Manufacturer#3 almond antique metallic orange dim 3 3 1410.39 4272.34 19 5 -Manufacturer#3 almond antique misty red olive 4 4 1922.98 6195.32 1 -18 -Manufacturer#3 almond antique olive coral navajo 5 5 1337.29 7532.61 45 44 Manufacturer#4 almond antique gainsboro frosted violet 1 1 1620.67 1620.67 10 0 Manufacturer#4 almond antique violet mint lemon 2 2 1375.42 2996.09 39 29 Manufacturer#4 almond aquamarine floral ivory bisque 3 3 1206.26 4202.35 27 -12 Manufacturer#4 almond aquamarine yellow dodger mint 4 4 1844.92 6047.27 7 -20 Manufacturer#4 almond azure aquamarine papaya violet 5 5 1290.35 7337.620000000001 12 5 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 2031.98 5523.360000000001 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 1701.6 8923.62 18 -7 Manufacturer#5 almond antique blue firebrick mint 1 1 1789.69 1789.69 31 0 Manufacturer#5 almond antique medium spring khaki 2 2 1611.66 3401.3500000000004 6 -25 Manufacturer#5 almond antique sky peru orange 3 3 1788.73 5190.08 2 -4 @@ -344,27 +344,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 Manufacturer#1 almond antique burnished rose metallic 2 1 Manufacturer#1 almond antique burnished rose metallic 2 1 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 Manufacturer#1 almond aquamarine burnished black steel 28 5 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 -Manufacturer#2 almond antique violet turquoise frosted 40 2 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 -Manufacturer#3 almond antique chartreuse khaki white 17 1 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 -Manufacturer#3 almond antique metallic orange dim 19 3 -Manufacturer#3 almond antique misty red olive 1 4 -Manufacturer#3 almond antique olive coral navajo 45 5 Manufacturer#4 almond antique gainsboro frosted violet 10 1 Manufacturer#4 almond antique violet mint lemon 39 2 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 Manufacturer#5 almond antique blue firebrick mint 31 1 Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 @@ -388,27 +388,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 -Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 -Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 -Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 @@ -432,27 +432,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 -Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 -Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 -Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 @@ -478,27 +478,27 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 17 1 +Manufacturer#3 almond antique metallic orange dim 19 19 17 45 +Manufacturer#3 almond antique misty red olive 1 1 14 45 +Manufacturer#3 almond antique olive coral navajo 45 45 19 45 Manufacturer#1 almond antique burnished rose metallic 2 2 2 34 Manufacturer#1 almond antique burnished rose metallic 2 2 2 6 Manufacturer#1 almond antique chartreuse lavender yellow 34 34 2 28 Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 2 42 Manufacturer#1 almond aquamarine burnished black steel 28 28 34 42 Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 6 42 -Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 2 -Manufacturer#2 almond antique violet turquoise frosted 40 40 14 25 -Manufacturer#2 almond aquamarine midnight light salmon 2 2 14 18 -Manufacturer#2 almond aquamarine rose maroon antique 25 25 40 18 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 2 18 -Manufacturer#3 almond antique chartreuse khaki white 17 17 17 19 -Manufacturer#3 almond antique forest lavender goldenrod 14 14 17 1 -Manufacturer#3 almond antique metallic orange dim 19 19 17 45 -Manufacturer#3 almond antique misty red olive 1 1 14 45 -Manufacturer#3 almond antique olive coral navajo 45 45 19 45 Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 27 Manufacturer#4 almond antique violet mint lemon 39 39 10 7 Manufacturer#4 almond aquamarine floral ivory bisque 27 27 10 12 Manufacturer#4 almond aquamarine yellow dodger mint 7 7 39 12 Manufacturer#4 almond azure aquamarine papaya violet 12 12 27 12 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 2 +Manufacturer#2 almond antique violet turquoise frosted 40 40 14 25 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 14 18 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 40 18 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 2 18 Manufacturer#5 almond antique blue firebrick mint 31 31 31 2 Manufacturer#5 almond antique medium spring khaki 6 6 31 46 Manufacturer#5 almond antique sky peru orange 2 2 31 23 @@ -528,11 +528,11 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#3 almond antique chartreuse khaki white 17 1 17 17 19 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 14 17 1 -Manufacturer#3 almond antique metallic orange dim 19 3 19 17 45 -Manufacturer#3 almond antique misty red olive 1 4 1 14 45 -Manufacturer#3 almond antique olive coral navajo 45 5 45 19 45 +Manufacturer#3 almond antique chartreuse khaki white 17 1 17 17 14 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 14 17 14 +Manufacturer#3 almond antique metallic orange dim 19 1 19 19 1 +Manufacturer#3 almond antique misty red olive 1 2 1 19 1 +Manufacturer#3 almond antique olive coral navajo 45 1 45 45 45 PREHOOK: query: -- 13. testSumWindow select p_mfgr,p_name, p_size, sum(p_size) over w1 as s1, @@ -551,27 +551,27 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 50 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 14 +Manufacturer#3 almond antique metallic orange dim 19 96 19 +Manufacturer#3 almond antique misty red olive 1 79 1 +Manufacturer#3 almond antique olive coral navajo 45 65 45 Manufacturer#1 almond antique burnished rose metallic 2 38 2 Manufacturer#1 almond antique burnished rose metallic 2 44 2 Manufacturer#1 almond antique chartreuse lavender yellow 34 72 34 Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 6 Manufacturer#1 almond aquamarine burnished black steel 28 110 28 Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 42 -Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 -Manufacturer#2 almond antique violet turquoise frosted 40 81 40 -Manufacturer#2 almond aquamarine midnight light salmon 2 99 2 -Manufacturer#2 almond aquamarine rose maroon antique 25 85 25 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 18 -Manufacturer#3 almond antique chartreuse khaki white 17 50 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 51 14 -Manufacturer#3 almond antique metallic orange dim 19 96 19 -Manufacturer#3 almond antique misty red olive 1 79 1 -Manufacturer#3 almond antique olive coral navajo 45 65 45 Manufacturer#4 almond antique gainsboro frosted violet 10 76 10 Manufacturer#4 almond antique violet mint lemon 39 83 39 Manufacturer#4 almond aquamarine floral ivory bisque 27 95 27 Manufacturer#4 almond aquamarine yellow dodger mint 7 85 7 Manufacturer#4 almond azure aquamarine papaya violet 12 46 12 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 +Manufacturer#2 almond antique violet turquoise frosted 40 81 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 18 Manufacturer#5 almond antique blue firebrick mint 31 39 31 Manufacturer#5 almond antique medium spring khaki 6 85 6 Manufacturer#5 almond antique sky peru orange 2 108 2 @@ -593,27 +593,27 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 +Manufacturer#3 almond antique metallic orange dim 19 3 3 +Manufacturer#3 almond antique misty red olive 1 4 4 +Manufacturer#3 almond antique olive coral navajo 45 5 5 Manufacturer#1 almond antique burnished rose metallic 2 1 1 Manufacturer#1 almond antique burnished rose metallic 2 1 1 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 Manufacturer#1 almond aquamarine burnished black steel 28 5 4 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 -Manufacturer#3 almond antique metallic orange dim 19 3 3 -Manufacturer#3 almond antique misty red olive 1 4 4 -Manufacturer#3 almond antique olive coral navajo 45 5 5 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 Manufacturer#4 almond antique violet mint lemon 39 2 2 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 Manufacturer#5 almond antique blue firebrick mint 31 1 1 Manufacturer#5 almond antique medium spring khaki 6 2 2 Manufacturer#5 almond antique sky peru orange 2 3 3 @@ -655,28 +655,28 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 0.0 1 1 17.0 0.0 2 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 0.25 1 2 15.5 1.5 2 14 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 0.5 2 3 16.666666666666668 2.0548046676563256 2 19 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0.8 0.75 2 4 12.75 7.013380069552769 2 1 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 1.0 3 5 19.2 14.344336861632886 2 45 19 Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 0.0 1 2 2.0 0.0 2 2 2 Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 0.0 1 2 2.0 0.0 2 2 2 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0.5 0.4 2 3 12.666666666666666 15.084944665313014 2 34 2 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0.6666666666666666 0.6 2 4 11.0 13.379088160259652 2 6 2 Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0.8333333333333334 0.8 3 5 14.4 13.763720427268202 2 28 34 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1.0 1.0 3 6 19.0 16.237815945091466 2 42 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 0.0 1 1 14.0 0.0 4 14 14 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 0.25 1 2 27.0 13.0 4 40 14 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 0.5 2 3 18.666666666666668 15.86050300449376 4 2 14 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 0.75 2 4 20.25 14.00669482783144 4 25 40 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 1.0 3 5 19.8 12.560254774486067 4 18 2 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 0.0 1 1 17.0 0.0 2 17 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 0.25 1 2 15.5 1.5 2 14 17 -Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 0.5 2 3 16.666666666666668 2.0548046676563256 2 19 17 -Manufacturer#3 almond antique misty red olive 1 4 4 0.8 0.75 2 4 12.75 7.013380069552769 2 1 14 -Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 1.0 3 5 19.2 14.344336861632886 2 45 19 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0.2 0.0 1 1 10.0 0.0 0 10 10 Manufacturer#4 almond antique violet mint lemon 39 2 2 0.4 0.25 1 2 24.5 14.5 0 39 10 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0.6 0.5 2 3 25.333333333333332 11.897712198383164 0 27 10 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0.8 0.75 2 4 20.75 13.007209539328564 0 7 39 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1.0 1.0 3 5 19.0 12.149074038789951 0 12 27 -Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 0.0 1 1 31.0 0.0 1 31 31 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 0.0 1 1 14.0 0.0 4 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 0.25 1 2 27.0 13.0 4 40 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 0.5 2 3 18.666666666666668 15.86050300449376 4 2 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 0.75 2 4 20.25 14.00669482783144 4 25 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 1.0 3 5 19.8 12.560254774486067 4 18 2 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 0.0 1 1 31.0 0.0 1 31 31 Manufacturer#5 almond antique medium spring khaki 6 2 2 0.4 0.25 1 2 18.5 12.5 1 6 31 Manufacturer#5 almond antique sky peru orange 2 3 3 0.6 0.5 2 3 13.0 12.832251036613439 1 2 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0.8 0.75 2 4 21.25 18.102140757380052 1 46 6 @@ -707,27 +707,27 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 17 31 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 31 14 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 50 50 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0.8 51 1 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 96 45 19 Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 4 4 2 Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 4 4 2 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0.5 38 34 2 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0.6666666666666666 44 10 2 Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0.8333333333333334 72 28 34 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1.0 114 42 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 14 14 14 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 54 40 14 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 56 2 14 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 81 25 40 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 99 32 2 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 17 31 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 31 14 17 -Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 50 50 17 -Manufacturer#3 almond antique misty red olive 1 4 4 0.8 51 1 14 -Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 96 45 19 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0.2 10 17 10 Manufacturer#4 almond antique violet mint lemon 39 2 2 0.4 49 39 10 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0.6 76 27 10 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0.8 83 7 39 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1.0 95 29 27 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 54 40 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 56 2 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 81 25 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 99 32 2 Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 31 31 31 Manufacturer#5 almond antique medium spring khaki 6 2 2 0.4 37 8 31 Manufacturer#5 almond antique sky peru orange 2 3 3 0.6 39 2 31 @@ -753,27 +753,27 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 17 +Manufacturer#3 almond antique misty red olive 1 4 4 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 19 Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 3 2 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 4 2 Manufacturer#1 almond aquamarine burnished black steel 28 5 5 34 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 6 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 -Manufacturer#3 almond antique metallic orange dim 19 3 3 17 -Manufacturer#3 almond antique misty red olive 1 4 4 14 -Manufacturer#3 almond antique olive coral navajo 45 5 5 19 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 Manufacturer#4 almond antique violet mint lemon 39 2 2 10 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 10 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 39 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 27 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 Manufacturer#5 almond antique medium spring khaki 6 2 2 31 Manufacturer#5 almond antique sky peru orange 2 3 3 31 @@ -801,27 +801,27 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 4272.34 1190.27 1671.68 1424.1133333333335 +Manufacturer#3 almond antique forest lavender goldenrod 14 6195.32 1190.27 1922.98 1548.83 +Manufacturer#3 almond antique metallic orange dim 19 7532.61 1190.27 1922.98 1506.522 +Manufacturer#3 almond antique misty red olive 1 5860.929999999999 1190.27 1922.98 1465.2324999999998 +Manufacturer#3 almond antique olive coral navajo 45 4670.66 1337.29 1922.98 1556.8866666666665 Manufacturer#1 almond antique burnished rose metallic 2 4100.06 1173.15 1753.76 1366.6866666666667 Manufacturer#1 almond antique burnished rose metallic 2 5702.650000000001 1173.15 1753.76 1425.6625000000001 Manufacturer#1 almond antique chartreuse lavender yellow 34 7117.070000000001 1173.15 1753.76 1423.4140000000002 Manufacturer#1 almond antique salmon chartreuse burlywood 6 7576.580000000002 1173.15 1753.76 1515.3160000000003 Manufacturer#1 almond aquamarine burnished black steel 28 6403.430000000001 1414.42 1753.76 1600.8575000000003 Manufacturer#1 almond aquamarine pink moccasin thistle 42 4649.670000000001 1414.42 1632.66 1549.8900000000003 -Manufacturer#2 almond antique violet chocolate turquoise 14 5523.360000000001 1690.68 2031.98 1841.1200000000001 -Manufacturer#2 almond antique violet turquoise frosted 40 7222.02 1690.68 2031.98 1805.505 -Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 1690.68 2031.98 1784.7240000000002 -Manufacturer#2 almond aquamarine rose maroon antique 25 7232.9400000000005 1698.66 2031.98 1808.2350000000001 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5432.240000000001 1698.66 2031.98 1810.746666666667 -Manufacturer#3 almond antique chartreuse khaki white 17 4272.34 1190.27 1671.68 1424.1133333333335 -Manufacturer#3 almond antique forest lavender goldenrod 14 6195.32 1190.27 1922.98 1548.83 -Manufacturer#3 almond antique metallic orange dim 19 7532.61 1190.27 1922.98 1506.522 -Manufacturer#3 almond antique misty red olive 1 5860.929999999999 1190.27 1922.98 1465.2324999999998 -Manufacturer#3 almond antique olive coral navajo 45 4670.66 1337.29 1922.98 1556.8866666666665 Manufacturer#4 almond antique gainsboro frosted violet 10 4202.35 1206.26 1620.67 1400.7833333333335 Manufacturer#4 almond antique violet mint lemon 39 6047.27 1206.26 1844.92 1511.8175 Manufacturer#4 almond aquamarine floral ivory bisque 27 7337.620000000001 1206.26 1844.92 1467.5240000000001 Manufacturer#4 almond aquamarine yellow dodger mint 7 5716.950000000001 1206.26 1844.92 1429.2375000000002 Manufacturer#4 almond azure aquamarine papaya violet 12 4341.530000000001 1206.26 1844.92 1447.176666666667 +Manufacturer#2 almond antique violet chocolate turquoise 14 5523.360000000001 1690.68 2031.98 1841.1200000000001 +Manufacturer#2 almond antique violet turquoise frosted 40 7222.02 1690.68 2031.98 1805.505 +Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 1690.68 2031.98 1784.7240000000002 +Manufacturer#2 almond aquamarine rose maroon antique 25 7232.9400000000005 1698.66 2031.98 1808.2350000000001 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5432.240000000001 1698.66 2031.98 1810.746666666667 Manufacturer#5 almond antique blue firebrick mint 31 5190.08 1611.66 1789.69 1730.0266666666666 Manufacturer#5 almond antique medium spring khaki 6 6208.18 1018.1 1789.69 1552.045 Manufacturer#5 almond antique sky peru orange 2 7672.66 1018.1 1789.69 1534.532 @@ -851,26 +851,26 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 1173.15 4529.5 1173.15 1173.15 1509.8333333333333 -Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 5943.92 1753.76 1753.76 1485.98 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 7576.58 1602.59 1602.59 1515.316 -Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 6403.43 1414.42 1414.42 1600.8575 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 4649.67 1632.66 1632.66 1549.89 -Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 5523.360000000001 1690.68 1690.68 1841.1200000000001 -Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 7222.02 1800.7 1800.7 1805.505 -Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 8923.62 2031.98 2031.98 1784.7240000000002 -Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 7232.9400000000005 1698.66 1698.66 1808.2350000000001 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5432.240000000001 1701.6 1701.6 1810.746666666667 Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 4272.34 1671.68 1671.68 1424.1133333333335 Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 6195.32 1190.27 1190.27 1548.83 Manufacturer#3 almond antique metallic orange dim 19 1410.39 7532.61 1410.39 1410.39 1506.522 Manufacturer#3 almond antique misty red olive 1 1922.98 5860.929999999999 1922.98 1922.98 1465.2324999999998 Manufacturer#3 almond antique olive coral navajo 45 1337.29 4670.66 1337.29 1337.29 1556.8866666666665 +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 4529.5 1173.15 1173.15 1509.8333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 5943.92 1753.76 1753.76 1485.98 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 7576.58 1602.59 1602.59 1515.316 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 6403.43 1414.42 1414.42 1600.8575 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 4649.67 1632.66 1632.66 1549.89 Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 4202.35 1620.67 1620.67 1400.7833333333335 Manufacturer#4 almond antique violet mint lemon 39 1375.42 6047.27 1375.42 1375.42 1511.8175 Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 7337.620000000001 1206.26 1206.26 1467.5240000000001 Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 5716.950000000001 1844.92 1844.92 1429.2375000000002 Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 4341.530000000001 1290.35 1290.35 1447.176666666667 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 5523.360000000001 1690.68 1690.68 1841.1200000000001 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 7222.02 1800.7 1800.7 1805.505 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 8923.62 2031.98 2031.98 1784.7240000000002 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 7232.9400000000005 1698.66 1698.66 1808.2350000000001 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5432.240000000001 1701.6 1701.6 1810.746666666667 Manufacturer#5 almond antique blue firebrick mint 31 1789.69 5190.08 1789.69 1789.69 1730.0266666666666 Manufacturer#5 almond antique medium spring khaki 6 1611.66 6208.18 1611.66 1611.66 1552.045 Manufacturer#5 almond antique sky peru orange 2 1788.73 7672.66 1788.73 1788.73 1534.532 @@ -902,27 +902,27 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 196.7742266885805 196.7742266885805 [17,19,14] 38720.09628888887 0.5557168646224995 224.6944444444446 +Manufacturer#3 almond antique forest lavender goldenrod 14 275.14144189852607 275.14144189852607 [17,1,19,14] 75702.81305 -0.6720833036576083 -1296.9000000000003 +Manufacturer#3 almond antique metallic orange dim 19 260.23473614412046 260.23473614412046 [17,1,19,14,45] 67722.117896 -0.5703526513979519 -2129.0664 +Manufacturer#3 almond antique misty red olive 1 275.9139962356932 275.9139962356932 [1,19,14,45] 76128.53331875012 -0.577476899644802 -2547.7868749999993 +Manufacturer#3 almond antique olive coral navajo 45 260.5815918713796 260.5815918713796 [1,19,45] 67902.76602222225 -0.8710736366736884 -4099.731111111111 Manufacturer#1 almond antique burnished rose metallic 2 273.70217881648074 273.70217881648074 [34,2] 74912.8826888888 1.0 4128.782222222221 Manufacturer#1 almond antique burnished rose metallic 2 258.10677784349235 258.10677784349235 [34,2,6] 66619.10876874991 0.811328754177887 2801.7074999999995 Manufacturer#1 almond antique chartreuse lavender yellow 34 230.90151585470358 230.90151585470358 [34,2,6,28] 53315.51002399992 0.695639377397664 2210.7864 Manufacturer#1 almond antique salmon chartreuse burlywood 6 202.73109328368946 202.73109328368946 [34,2,6,42,28] 41099.896184 0.630785977101214 2009.9536000000007 Manufacturer#1 almond aquamarine burnished black steel 28 121.6064517973862 121.6064517973862 [34,6,42,28] 14788.129118750014 0.2036684720435979 331.1337500000004 Manufacturer#1 almond aquamarine pink moccasin thistle 42 96.5751586416853 96.5751586416853 [6,42,28] 9326.761266666683 -1.4442181184933883E-4 -0.20666666666708502 -Manufacturer#2 almond antique violet chocolate turquoise 14 142.2363169751898 142.2363169751898 [2,40,14] 20231.169866666663 -0.49369526554523185 -1113.7466666666658 -Manufacturer#2 almond antique violet turquoise frosted 40 137.76306498840682 137.76306498840682 [2,25,40,14] 18978.662075 -0.5205630897335946 -1004.4812499999995 -Manufacturer#2 almond aquamarine midnight light salmon 2 130.03972279269132 130.03972279269132 [2,18,25,40,14] 16910.329504000005 -0.46908967495720255 -766.1791999999995 -Manufacturer#2 almond aquamarine rose maroon antique 25 135.55100986344584 135.55100986344584 [2,18,25,40] 18374.07627499999 -0.6091405874714462 -1128.1787499999987 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 156.44019460768044 156.44019460768044 [2,18,25] 24473.534488888927 -0.9571686373491608 -1441.4466666666676 -Manufacturer#3 almond antique chartreuse khaki white 17 196.7742266885805 196.7742266885805 [17,19,14] 38720.09628888887 0.5557168646224995 224.6944444444446 -Manufacturer#3 almond antique forest lavender goldenrod 14 275.14144189852607 275.14144189852607 [17,1,19,14] 75702.81305 -0.6720833036576083 -1296.9000000000003 -Manufacturer#3 almond antique metallic orange dim 19 260.23473614412046 260.23473614412046 [17,1,19,14,45] 67722.117896 -0.5703526513979519 -2129.0664 -Manufacturer#3 almond antique misty red olive 1 275.9139962356932 275.9139962356932 [1,19,14,45] 76128.53331875012 -0.577476899644802 -2547.7868749999993 -Manufacturer#3 almond antique olive coral navajo 45 260.5815918713796 260.5815918713796 [1,19,45] 67902.76602222225 -0.8710736366736884 -4099.731111111111 Manufacturer#4 almond antique gainsboro frosted violet 10 170.13011889596618 170.13011889596618 [39,27,10] 28944.25735555559 -0.6656975320098423 -1347.4777777777779 Manufacturer#4 almond antique violet mint lemon 39 242.26834609323197 242.26834609323197 [39,7,27,10] 58693.95151875002 -0.8051852719193339 -2537.328125 Manufacturer#4 almond aquamarine floral ivory bisque 27 234.10001662537326 234.10001662537326 [39,7,27,10,12] 54802.817784000035 -0.6046935574240581 -1719.8079999999995 Manufacturer#4 almond aquamarine yellow dodger mint 7 247.3342714197732 247.3342714197732 [39,7,27,12] 61174.24181875003 -0.5508665654707869 -1719.0368749999975 Manufacturer#4 almond azure aquamarine papaya violet 12 283.3344330566893 283.3344330566893 [7,27,12] 80278.40095555557 -0.7755740084632333 -1867.4888888888881 +Manufacturer#2 almond antique violet chocolate turquoise 14 142.2363169751898 142.2363169751898 [2,40,14] 20231.169866666663 -0.49369526554523185 -1113.7466666666658 +Manufacturer#2 almond antique violet turquoise frosted 40 137.76306498840682 137.76306498840682 [2,25,40,14] 18978.662075 -0.5205630897335946 -1004.4812499999995 +Manufacturer#2 almond aquamarine midnight light salmon 2 130.03972279269132 130.03972279269132 [2,18,25,40,14] 16910.329504000005 -0.46908967495720255 -766.1791999999995 +Manufacturer#2 almond aquamarine rose maroon antique 25 135.55100986344584 135.55100986344584 [2,18,25,40] 18374.07627499999 -0.6091405874714462 -1128.1787499999987 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 156.44019460768044 156.44019460768044 [2,18,25] 24473.534488888927 -0.9571686373491608 -1441.4466666666676 Manufacturer#5 almond antique blue firebrick mint 31 83.69879024746363 83.69879024746363 [2,6,31] 7005.487488888913 0.39004303087285047 418.9233333333353 Manufacturer#5 almond antique medium spring khaki 6 316.68049612345885 316.68049612345885 [2,6,46,31] 100286.53662500004 -0.713612911776183 -4090.853749999999 Manufacturer#5 almond antique sky peru orange 2 285.40506298242155 285.40506298242155 [2,23,6,46,31] 81456.04997600002 -0.712858514567818 -3297.2011999999986 @@ -948,27 +948,27 @@ window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 precedi POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0}] 90681.0 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 65831.5 2 +Manufacturer#3 almond antique metallic orange dim 19 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 90681.0 3 +Manufacturer#3 almond antique misty red olive 1 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 76690.0 4 +Manufacturer#3 almond antique olive coral navajo 45 [{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 112398.0 5 Manufacturer#1 almond antique burnished rose metallic 2 [{"x":1173.15,"y":2.0},{"x":1753.76,"y":1.0}] 121152.0 1 Manufacturer#1 almond antique burnished rose metallic 2 [{"x":1173.15,"y":2.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}] 115872.0 2 Manufacturer#1 almond antique chartreuse lavender yellow 34 [{"x":1173.15,"y":2.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}] 110592.0 3 Manufacturer#1 almond antique salmon chartreuse burlywood 6 [{"x":1173.15,"y":1.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}] 86428.0 4 Manufacturer#1 almond aquamarine burnished black steel 28 [{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}] 86098.0 5 Manufacturer#1 almond aquamarine pink moccasin thistle 42 [{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0}] 86428.0 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 [{"x":1690.68,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 1 -Manufacturer#2 almond antique violet turquoise frosted 40 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 139825.5 2 -Manufacturer#2 almond aquamarine midnight light salmon 2 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 3 -Manufacturer#2 almond aquamarine rose maroon antique 25 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 169347.0 4 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 5 -Manufacturer#3 almond antique chartreuse khaki white 17 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0}] 90681.0 1 -Manufacturer#3 almond antique forest lavender goldenrod 14 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 65831.5 2 -Manufacturer#3 almond antique metallic orange dim 19 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 90681.0 3 -Manufacturer#3 almond antique misty red olive 1 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 76690.0 4 -Manufacturer#3 almond antique olive coral navajo 45 [{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 112398.0 5 Manufacturer#4 almond antique gainsboro frosted violet 10 [{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0}] 48427.0 1 Manufacturer#4 almond antique violet mint lemon 39 [{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}] 46844.0 2 Manufacturer#4 almond aquamarine floral ivory bisque 27 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}] 45261.0 3 Manufacturer#4 almond aquamarine yellow dodger mint 7 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1844.92,"y":1.0}] 39309.0 4 Manufacturer#4 almond azure aquamarine papaya violet 12 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1844.92,"y":1.0}] 33357.0 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 [{"x":1690.68,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 1 +Manufacturer#2 almond antique violet turquoise frosted 40 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 139825.5 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 169347.0 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 5 Manufacturer#5 almond antique blue firebrick mint 31 [{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 155733.0 1 Manufacturer#5 almond antique medium spring khaki 6 [{"x":1018.1,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 99201.0 2 Manufacturer#5 almond antique sky peru orange 2 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 78486.0 3 @@ -1050,19 +1050,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@mfgr_price_view POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 Brand#31 1671.68 1671.68 +Manufacturer#3 Brand#32 3333.37 5005.05 +Manufacturer#3 Brand#34 1337.29 6342.34 +Manufacturer#3 Brand#35 1190.27 5860.93 Manufacturer#1 Brand#12 4800.84 4800.84 Manufacturer#1 Brand#14 2346.3 7147.14 Manufacturer#1 Brand#15 1602.59 8749.73 +Manufacturer#4 Brand#41 4755.94 4755.94 +Manufacturer#4 Brand#42 2581.68 7337.62 Manufacturer#2 Brand#22 3491.38 3491.38 Manufacturer#2 Brand#23 2031.98 5523.36 Manufacturer#2 Brand#24 1698.66 7222.02 Manufacturer#2 Brand#25 1701.6 5432.24 -Manufacturer#3 Brand#31 1671.68 1671.68 -Manufacturer#3 Brand#32 3333.37 5005.05 -Manufacturer#3 Brand#34 1337.29 6342.34 -Manufacturer#3 Brand#35 1190.27 5860.93 -Manufacturer#4 Brand#41 4755.94 4755.94 -Manufacturer#4 Brand#42 2581.68 7337.62 Manufacturer#5 Brand#51 1611.66 1611.66 Manufacturer#5 Brand#52 3254.17 4865.83 Manufacturer#5 Brand#53 2806.83 7672.66 @@ -1096,27 +1096,27 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@mfgr_brand_price_view POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 Brand#31 1671.68 +Manufacturer#3 Brand#35 2861.95 +Manufacturer#3 Brand#32 4272.34 +Manufacturer#3 Brand#32 4523.639999999999 +Manufacturer#3 Brand#34 4670.66 Manufacturer#1 Brand#14 1173.15 Manufacturer#1 Brand#14 2346.3 Manufacturer#1 Brand#12 4100.06 Manufacturer#1 Brand#15 4529.5 Manufacturer#1 Brand#12 4770.77 Manufacturer#1 Brand#12 4649.670000000001 -Manufacturer#2 Brand#22 1690.68 -Manufacturer#2 Brand#22 3491.38 -Manufacturer#2 Brand#23 5523.360000000001 -Manufacturer#2 Brand#24 5531.34 -Manufacturer#2 Brand#25 5432.240000000001 -Manufacturer#3 Brand#31 1671.68 -Manufacturer#3 Brand#35 2861.95 -Manufacturer#3 Brand#32 4272.34 -Manufacturer#3 Brand#32 4523.639999999999 -Manufacturer#3 Brand#34 4670.66 Manufacturer#4 Brand#41 1620.67 Manufacturer#4 Brand#42 2996.09 Manufacturer#4 Brand#42 4202.35 Manufacturer#4 Brand#41 4426.6 Manufacturer#4 Brand#41 4341.530000000001 +Manufacturer#2 Brand#22 1690.68 +Manufacturer#2 Brand#22 3491.38 +Manufacturer#2 Brand#23 5523.360000000001 +Manufacturer#2 Brand#24 5531.34 +Manufacturer#2 Brand#25 5432.240000000001 Manufacturer#5 Brand#52 1789.69 Manufacturer#5 Brand#51 3401.3500000000004 Manufacturer#5 Brand#53 5190.08 @@ -1140,6 +1140,21 @@ window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 precedi POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique misty red olive 2 1 2 +Manufacturer#3 almond antique misty red olive 3 1 3 +Manufacturer#3 almond antique forest lavender goldenrod 1 14 16 +Manufacturer#3 almond antique forest lavender goldenrod 2 14 29 +Manufacturer#3 almond antique forest lavender goldenrod 3 14 42 +Manufacturer#3 almond antique chartreuse khaki white 1 17 45 +Manufacturer#3 almond antique chartreuse khaki white 2 17 48 +Manufacturer#3 almond antique chartreuse khaki white 3 17 51 +Manufacturer#3 almond antique metallic orange dim 1 19 53 +Manufacturer#3 almond antique metallic orange dim 2 19 55 +Manufacturer#3 almond antique metallic orange dim 3 19 57 +Manufacturer#3 almond antique olive coral navajo 1 45 83 +Manufacturer#3 almond antique olive coral navajo 2 45 109 +Manufacturer#3 almond antique olive coral navajo 3 45 135 Manufacturer#1 almond antique burnished rose metallic 1 2 2 Manufacturer#1 almond antique burnished rose metallic 1 2 4 Manufacturer#1 almond antique burnished rose metallic 2 2 6 @@ -1158,36 +1173,6 @@ Manufacturer#1 almond antique chartreuse lavender yellow 3 34 102 Manufacturer#1 almond aquamarine pink moccasin thistle 1 42 110 Manufacturer#1 almond aquamarine pink moccasin thistle 2 42 118 Manufacturer#1 almond aquamarine pink moccasin thistle 3 42 126 -Manufacturer#2 almond aquamarine midnight light salmon 1 2 2 -Manufacturer#2 almond aquamarine midnight light salmon 2 2 4 -Manufacturer#2 almond aquamarine midnight light salmon 3 2 6 -Manufacturer#2 almond antique violet chocolate turquoise 1 14 18 -Manufacturer#2 almond antique violet chocolate turquoise 2 14 30 -Manufacturer#2 almond antique violet chocolate turquoise 3 14 42 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 18 46 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 2 18 50 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 3 18 54 -Manufacturer#2 almond aquamarine rose maroon antique 1 25 61 -Manufacturer#2 almond aquamarine rose maroon antique 2 25 68 -Manufacturer#2 almond aquamarine rose maroon antique 3 25 75 -Manufacturer#2 almond antique violet turquoise frosted 1 40 90 -Manufacturer#2 almond antique violet turquoise frosted 2 40 105 -Manufacturer#2 almond antique violet turquoise frosted 3 40 120 -Manufacturer#3 almond antique misty red olive 1 1 1 -Manufacturer#3 almond antique misty red olive 2 1 2 -Manufacturer#3 almond antique misty red olive 3 1 3 -Manufacturer#3 almond antique forest lavender goldenrod 1 14 16 -Manufacturer#3 almond antique forest lavender goldenrod 2 14 29 -Manufacturer#3 almond antique forest lavender goldenrod 3 14 42 -Manufacturer#3 almond antique chartreuse khaki white 1 17 45 -Manufacturer#3 almond antique chartreuse khaki white 2 17 48 -Manufacturer#3 almond antique chartreuse khaki white 3 17 51 -Manufacturer#3 almond antique metallic orange dim 1 19 53 -Manufacturer#3 almond antique metallic orange dim 2 19 55 -Manufacturer#3 almond antique metallic orange dim 3 19 57 -Manufacturer#3 almond antique olive coral navajo 1 45 83 -Manufacturer#3 almond antique olive coral navajo 2 45 109 -Manufacturer#3 almond antique olive coral navajo 3 45 135 Manufacturer#4 almond aquamarine yellow dodger mint 1 7 7 Manufacturer#4 almond aquamarine yellow dodger mint 2 7 14 Manufacturer#4 almond aquamarine yellow dodger mint 3 7 21 @@ -1203,6 +1188,21 @@ Manufacturer#4 almond aquamarine floral ivory bisque 3 27 81 Manufacturer#4 almond antique violet mint lemon 1 39 93 Manufacturer#4 almond antique violet mint lemon 2 39 105 Manufacturer#4 almond antique violet mint lemon 3 39 117 +Manufacturer#2 almond aquamarine midnight light salmon 1 2 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 4 +Manufacturer#2 almond aquamarine midnight light salmon 3 2 6 +Manufacturer#2 almond antique violet chocolate turquoise 1 14 18 +Manufacturer#2 almond antique violet chocolate turquoise 2 14 30 +Manufacturer#2 almond antique violet chocolate turquoise 3 14 42 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 18 46 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 2 18 50 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 3 18 54 +Manufacturer#2 almond aquamarine rose maroon antique 1 25 61 +Manufacturer#2 almond aquamarine rose maroon antique 2 25 68 +Manufacturer#2 almond aquamarine rose maroon antique 3 25 75 +Manufacturer#2 almond antique violet turquoise frosted 1 40 90 +Manufacturer#2 almond antique violet turquoise frosted 2 40 105 +Manufacturer#2 almond antique violet turquoise frosted 3 40 120 Manufacturer#5 almond antique sky peru orange 1 2 2 Manufacturer#5 almond antique sky peru orange 2 2 4 Manufacturer#5 almond antique sky peru orange 3 2 6 @@ -1362,27 +1362,27 @@ POSTHOOK: query: select * from part_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@part_1 #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 -Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 -Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 -Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 @@ -1396,30 +1396,30 @@ POSTHOOK: query: select * from part_2 POSTHOOK: type: QUERY POSTHOOK: Input: default@part_2 #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0 31.0 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0 14.0 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0 50.0 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0 1.0 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1 45.0 19 Manufacturer#1 almond antique burnished rose metallic 2 1 1 0 4.0 2 Manufacturer#1 almond antique burnished rose metallic 2 1 1 0 4.0 2 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0 34.0 2 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0 10.0 2 Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0 28.0 34 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1 42.0 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0 14.0 14 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0 40.0 14 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0 2.0 14 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0 25.0 40 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1 32.0 2 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0 31.0 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0 14.0 17 -Manufacturer#3 almond antique metallic orange dim 19 3 3 0 50.0 17 -Manufacturer#3 almond antique misty red olive 1 4 4 0 1.0 14 -Manufacturer#3 almond antique olive coral navajo 45 5 5 1 45.0 19 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0 17.0 10 Manufacturer#4 almond antique violet mint lemon 39 2 2 0 39.0 10 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0 27.0 10 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0 7.0 39 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1 29.0 27 -Manufacturer#5 almond antique blue firebrick mint 31 1 1 0 31.0 31 -Manufacturer#5 almond antique medium spring khaki 6 2 2 0 8.0 31 -Manufacturer#5 almond antique sky peru orange 2 3 3 0 2.0 31 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0 14.0 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0 40.0 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0 2.0 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0 25.0 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1 32.0 2 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0 31.0 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0 8.0 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0 2.0 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0 46.0 6 Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1 23.0 2 PREHOOK: query: select * from part_3 @@ -1430,27 +1430,27 @@ POSTHOOK: query: select * from part_3 POSTHOOK: type: QUERY POSTHOOK: Input: default@part_3 #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 17 +Manufacturer#3 almond antique misty red olive 1 4 4 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 19 Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 3 2 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 4 2 Manufacturer#1 almond aquamarine burnished black steel 28 5 5 34 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 6 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 -Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 -Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 -Manufacturer#3 almond antique metallic orange dim 19 3 3 17 -Manufacturer#3 almond antique misty red olive 1 4 4 14 -Manufacturer#3 almond antique olive coral navajo 45 5 5 19 Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 Manufacturer#4 almond antique violet mint lemon 39 2 2 10 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 10 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 39 Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 27 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 Manufacturer#5 almond antique medium spring khaki 6 2 2 31 Manufacturer#5 almond antique sky peru orange 2 3 3 31 @@ -1478,26 +1478,26 @@ having p_size > 0 POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 -Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 -Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 -Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 -Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 -Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 -Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 @@ -1521,27 +1521,27 @@ window w1 as (rows between 2 preceding and 2 following) POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 50 +Manufacturer#3 almond antique chartreuse khaki white 17 31 36 +Manufacturer#3 almond antique metallic orange dim 19 50 19 +Manufacturer#3 almond antique olive coral navajo 45 45 45 Manufacturer#1 almond antique burnished rose metallic 2 4 10 Manufacturer#1 almond antique burnished rose metallic 2 4 10 Manufacturer#1 almond antique salmon chartreuse burlywood 6 10 6 Manufacturer#1 almond aquamarine burnished black steel 28 28 62 Manufacturer#1 almond antique chartreuse lavender yellow 34 62 76 Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 42 -Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 -Manufacturer#2 almond antique violet chocolate turquoise 14 14 32 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 32 43 -Manufacturer#2 almond aquamarine rose maroon antique 25 43 25 -Manufacturer#2 almond antique violet turquoise frosted 40 40 40 -Manufacturer#3 almond antique misty red olive 1 1 1 -Manufacturer#3 almond antique forest lavender goldenrod 14 14 50 -Manufacturer#3 almond antique chartreuse khaki white 17 31 36 -Manufacturer#3 almond antique metallic orange dim 19 50 19 -Manufacturer#3 almond antique olive coral navajo 45 45 45 Manufacturer#4 almond aquamarine yellow dodger mint 7 7 29 Manufacturer#4 almond antique gainsboro frosted violet 10 17 22 Manufacturer#4 almond azure aquamarine papaya violet 12 29 12 Manufacturer#4 almond aquamarine floral ivory bisque 27 27 27 Manufacturer#4 almond antique violet mint lemon 39 39 39 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 32 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 32 43 +Manufacturer#2 almond aquamarine rose maroon antique 25 43 25 +Manufacturer#2 almond antique violet turquoise frosted 40 40 40 Manufacturer#5 almond antique sky peru orange 2 2 8 Manufacturer#5 almond antique medium spring khaki 6 8 6 Manufacturer#5 almond azure blanched chiffon midnight 23 23 54 @@ -1561,27 +1561,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 Manufacturer#1 almond antique burnished rose metallic 2 38 Manufacturer#1 almond antique burnished rose metallic 2 44 Manufacturer#1 almond antique chartreuse lavender yellow 34 72 Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 Manufacturer#1 almond aquamarine burnished black steel 28 110 Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 -Manufacturer#2 almond antique violet chocolate turquoise 14 56 -Manufacturer#2 almond antique violet turquoise frosted 40 81 -Manufacturer#2 almond aquamarine midnight light salmon 2 99 -Manufacturer#2 almond aquamarine rose maroon antique 25 85 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 -Manufacturer#3 almond antique chartreuse khaki white 17 50 -Manufacturer#3 almond antique forest lavender goldenrod 14 51 -Manufacturer#3 almond antique metallic orange dim 19 96 -Manufacturer#3 almond antique misty red olive 1 79 -Manufacturer#3 almond antique olive coral navajo 45 65 Manufacturer#4 almond antique gainsboro frosted violet 10 76 Manufacturer#4 almond antique violet mint lemon 39 83 Manufacturer#4 almond aquamarine floral ivory bisque 27 95 Manufacturer#4 almond aquamarine yellow dodger mint 7 85 Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 Manufacturer#5 almond antique blue firebrick mint 31 39 Manufacturer#5 almond antique medium spring khaki 6 85 Manufacturer#5 almond antique sky peru orange 2 108 @@ -1603,27 +1603,27 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 Manufacturer#1 almond antique burnished rose metallic 2 38 Manufacturer#1 almond antique burnished rose metallic 2 44 Manufacturer#1 almond antique chartreuse lavender yellow 34 72 Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 Manufacturer#1 almond aquamarine burnished black steel 28 110 Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 -Manufacturer#2 almond antique violet chocolate turquoise 14 56 -Manufacturer#2 almond antique violet turquoise frosted 40 81 -Manufacturer#2 almond aquamarine midnight light salmon 2 99 -Manufacturer#2 almond aquamarine rose maroon antique 25 85 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 -Manufacturer#3 almond antique chartreuse khaki white 17 50 -Manufacturer#3 almond antique forest lavender goldenrod 14 51 -Manufacturer#3 almond antique metallic orange dim 19 96 -Manufacturer#3 almond antique misty red olive 1 79 -Manufacturer#3 almond antique olive coral navajo 45 65 Manufacturer#4 almond antique gainsboro frosted violet 10 76 Manufacturer#4 almond antique violet mint lemon 39 83 Manufacturer#4 almond aquamarine floral ivory bisque 27 95 Manufacturer#4 almond aquamarine yellow dodger mint 7 85 Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 Manufacturer#5 almond antique blue firebrick mint 31 39 Manufacturer#5 almond antique medium spring khaki 6 85 Manufacturer#5 almond antique sky peru orange 2 108 @@ -1649,27 +1649,27 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 50 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 31 +Manufacturer#3 almond antique metallic orange dim 19 96 50 +Manufacturer#3 almond antique misty red olive 1 79 51 +Manufacturer#3 almond antique olive coral navajo 45 65 96 Manufacturer#1 almond antique burnished rose metallic 2 38 4 Manufacturer#1 almond antique burnished rose metallic 2 44 4 Manufacturer#1 almond antique chartreuse lavender yellow 34 72 38 Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 44 Manufacturer#1 almond aquamarine burnished black steel 28 110 72 Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 114 -Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 -Manufacturer#2 almond antique violet turquoise frosted 40 81 54 -Manufacturer#2 almond aquamarine midnight light salmon 2 99 56 -Manufacturer#2 almond aquamarine rose maroon antique 25 85 81 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 99 -Manufacturer#3 almond antique chartreuse khaki white 17 50 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 51 31 -Manufacturer#3 almond antique metallic orange dim 19 96 50 -Manufacturer#3 almond antique misty red olive 1 79 51 -Manufacturer#3 almond antique olive coral navajo 45 65 96 Manufacturer#4 almond antique gainsboro frosted violet 10 76 10 Manufacturer#4 almond antique violet mint lemon 39 83 49 Manufacturer#4 almond aquamarine floral ivory bisque 27 95 76 Manufacturer#4 almond aquamarine yellow dodger mint 7 85 83 Manufacturer#4 almond azure aquamarine papaya violet 12 46 95 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 +Manufacturer#2 almond antique violet turquoise frosted 40 81 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 99 Manufacturer#5 almond antique blue firebrick mint 31 39 31 Manufacturer#5 almond antique medium spring khaki 6 85 37 Manufacturer#5 almond antique sky peru orange 2 108 39 @@ -1695,27 +1695,27 @@ window w1 as (partition by p_mfgr order by p_name range between 2 preceding and POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 14 +Manufacturer#3 almond antique metallic orange dim 19 19 19 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique olive coral navajo 45 45 45 Manufacturer#1 almond antique burnished rose metallic 2 4 4 Manufacturer#1 almond antique burnished rose metallic 2 4 4 Manufacturer#1 almond antique chartreuse lavender yellow 34 34 34 Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 6 Manufacturer#1 almond aquamarine burnished black steel 28 28 28 Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 42 -Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 -Manufacturer#2 almond antique violet turquoise frosted 40 40 40 -Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 -Manufacturer#2 almond aquamarine rose maroon antique 25 25 25 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 18 -Manufacturer#3 almond antique chartreuse khaki white 17 17 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 14 14 -Manufacturer#3 almond antique metallic orange dim 19 19 19 -Manufacturer#3 almond antique misty red olive 1 1 1 -Manufacturer#3 almond antique olive coral navajo 45 45 45 Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 Manufacturer#4 almond antique violet mint lemon 39 39 39 Manufacturer#4 almond aquamarine floral ivory bisque 27 27 27 Manufacturer#4 almond aquamarine yellow dodger mint 7 7 7 Manufacturer#4 almond azure aquamarine papaya violet 12 12 12 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 18 Manufacturer#5 almond antique blue firebrick mint 31 31 31 Manufacturer#5 almond antique medium spring khaki 6 6 6 Manufacturer#5 almond antique sky peru orange 2 2 2 @@ -1741,27 +1741,27 @@ window w1 as (partition by p_mfgr order by p_name range between 2 preceding and POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 +Manufacturer#3 almond antique metallic orange dim 19 19 50 +Manufacturer#3 almond antique misty red olive 1 1 51 +Manufacturer#3 almond antique olive coral navajo 45 45 96 Manufacturer#1 almond antique burnished rose metallic 2 4 2 Manufacturer#1 almond antique burnished rose metallic 2 4 4 Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 Manufacturer#1 almond aquamarine burnished black steel 28 28 72 Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 -Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 -Manufacturer#2 almond antique violet turquoise frosted 40 40 54 -Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 -Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 -Manufacturer#3 almond antique chartreuse khaki white 17 17 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 -Manufacturer#3 almond antique metallic orange dim 19 19 50 -Manufacturer#3 almond antique misty red olive 1 1 51 -Manufacturer#3 almond antique olive coral navajo 45 45 96 Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 Manufacturer#4 almond antique violet mint lemon 39 39 49 Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 Manufacturer#5 almond antique blue firebrick mint 31 31 31 Manufacturer#5 almond antique medium spring khaki 6 6 37 Manufacturer#5 almond antique sky peru orange 2 2 39 @@ -1791,27 +1791,27 @@ window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 31 +Manufacturer#3 almond antique metallic orange dim 19 19 50 50 +Manufacturer#3 almond antique misty red olive 1 1 51 51 +Manufacturer#3 almond antique olive coral navajo 45 45 96 96 Manufacturer#1 almond antique burnished rose metallic 2 4 4 4 Manufacturer#1 almond antique burnished rose metallic 2 4 4 4 Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 38 Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 44 Manufacturer#1 almond aquamarine burnished black steel 28 28 72 72 Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 114 -Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 14 -Manufacturer#2 almond antique violet turquoise frosted 40 40 54 54 -Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 56 -Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 81 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 99 -Manufacturer#3 almond antique chartreuse khaki white 17 17 17 17 -Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 31 -Manufacturer#3 almond antique metallic orange dim 19 19 50 50 -Manufacturer#3 almond antique misty red olive 1 1 51 51 -Manufacturer#3 almond antique olive coral navajo 45 45 96 96 Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 10 Manufacturer#4 almond antique violet mint lemon 39 39 49 49 Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 76 Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 83 Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 95 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 99 Manufacturer#5 almond antique blue firebrick mint 31 31 31 31 Manufacturer#5 almond antique medium spring khaki 6 6 37 37 Manufacturer#5 almond antique sky peru orange 2 2 39 39 @@ -1841,27 +1841,27 @@ window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 51 +Manufacturer#3 almond antique metallic orange dim 19 19 50 96 +Manufacturer#3 almond antique misty red olive 1 1 51 79 +Manufacturer#3 almond antique olive coral navajo 45 45 96 65 Manufacturer#1 almond antique burnished rose metallic 2 4 4 38 Manufacturer#1 almond antique burnished rose metallic 2 4 4 44 Manufacturer#1 almond antique chartreuse lavender yellow 34 34 38 72 Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 44 112 Manufacturer#1 almond aquamarine burnished black steel 28 28 72 110 Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 114 76 -Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 56 -Manufacturer#2 almond antique violet turquoise frosted 40 40 54 81 -Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 99 -Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 85 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 45 -Manufacturer#3 almond antique chartreuse khaki white 17 17 17 50 -Manufacturer#3 almond antique forest lavender goldenrod 14 14 31 51 -Manufacturer#3 almond antique metallic orange dim 19 19 50 96 -Manufacturer#3 almond antique misty red olive 1 1 51 79 -Manufacturer#3 almond antique olive coral navajo 45 45 96 65 Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 76 Manufacturer#4 almond antique violet mint lemon 39 39 49 83 Manufacturer#4 almond aquamarine floral ivory bisque 27 27 76 95 Manufacturer#4 almond aquamarine yellow dodger mint 7 7 83 85 Manufacturer#4 almond azure aquamarine papaya violet 12 12 95 46 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 40 54 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 56 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 81 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 99 45 Manufacturer#5 almond antique blue firebrick mint 31 31 31 39 Manufacturer#5 almond antique medium spring khaki 6 6 37 85 Manufacturer#5 almond antique sky peru orange 2 2 39 108 @@ -1883,26 +1883,26 @@ window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 42 -Manufacturer#1 almond antique chartreuse lavender yellow 34 70 -Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 -Manufacturer#1 almond aquamarine burnished black steel 28 110 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 -Manufacturer#2 almond antique violet chocolate turquoise 14 56 -Manufacturer#2 almond antique violet turquoise frosted 40 81 -Manufacturer#2 almond aquamarine midnight light salmon 2 99 -Manufacturer#2 almond aquamarine rose maroon antique 25 85 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 Manufacturer#3 almond antique chartreuse khaki white 17 50 Manufacturer#3 almond antique forest lavender goldenrod 14 51 Manufacturer#3 almond antique metallic orange dim 19 96 Manufacturer#3 almond antique misty red olive 1 79 Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#1 almond antique burnished rose metallic 2 42 +Manufacturer#1 almond antique chartreuse lavender yellow 34 70 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 Manufacturer#4 almond antique gainsboro frosted violet 10 76 Manufacturer#4 almond antique violet mint lemon 39 83 Manufacturer#4 almond aquamarine floral ivory bisque 27 95 Manufacturer#4 almond aquamarine yellow dodger mint 7 85 Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 Manufacturer#5 almond antique blue firebrick mint 31 39 Manufacturer#5 almond antique medium spring khaki 6 85 Manufacturer#5 almond antique sky peru orange 2 108 @@ -1922,27 +1922,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 Manufacturer#1 almond antique burnished rose metallic 2 1 Manufacturer#1 almond antique burnished rose metallic 2 1 Manufacturer#1 almond antique chartreuse lavender yellow 34 3 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 Manufacturer#1 almond aquamarine burnished black steel 28 5 Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 -Manufacturer#2 almond antique violet chocolate turquoise 14 1 -Manufacturer#2 almond antique violet turquoise frosted 40 2 -Manufacturer#2 almond aquamarine midnight light salmon 2 3 -Manufacturer#2 almond aquamarine rose maroon antique 25 4 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 -Manufacturer#3 almond antique chartreuse khaki white 17 1 -Manufacturer#3 almond antique forest lavender goldenrod 14 2 -Manufacturer#3 almond antique metallic orange dim 19 3 -Manufacturer#3 almond antique misty red olive 1 4 -Manufacturer#3 almond antique olive coral navajo 45 5 Manufacturer#4 almond antique gainsboro frosted violet 10 1 Manufacturer#4 almond antique violet mint lemon 39 2 Manufacturer#4 almond aquamarine floral ivory bisque 27 3 Manufacturer#4 almond aquamarine yellow dodger mint 7 4 Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 Manufacturer#5 almond antique blue firebrick mint 31 1 Manufacturer#5 almond antique medium spring khaki 6 2 Manufacturer#5 almond antique sky peru orange 2 3 @@ -1970,27 +1970,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 -Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 -Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 -Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 -Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 -Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 -Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 -Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 -Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 -Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 -Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 @@ -2017,29 +2017,29 @@ POSTHOOK: Input: default@part Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1173.15 1173.15 Manufacturer#1 almond antique burnished rose metallic 2 2346.3 1173.15 1173.15 Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 1753.76 1753.76 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 1698.66 1698.66 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 1337.29 1337.29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 1206.26 1206.26 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 1290.35 1290.35 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 1018.1 1018.1 Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 1602.59 1602.59 Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 1414.42 1414.42 -Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 1632.66 1632.66 -Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1690.68 1690.68 Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 1800.7 1800.7 Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 2031.98 2031.98 -Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 1698.66 1698.66 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 1190.27 1190.27 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 1844.92 1844.92 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 1611.66 1611.66 +Manufacturer#5 almond antique sky peru orange 2 1788.73 1788.73 1788.73 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 1464.48 1464.48 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 1632.66 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1690.68 1690.68 Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 1701.6 1701.6 Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1671.68 1671.68 -Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 1190.27 1190.27 Manufacturer#3 almond antique metallic orange dim 19 1410.39 1410.39 1410.39 Manufacturer#3 almond antique misty red olive 1 1922.98 1922.98 1922.98 -Manufacturer#3 almond antique olive coral navajo 45 1337.29 1337.29 1337.29 Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1620.67 1620.67 Manufacturer#4 almond antique violet mint lemon 39 1375.42 1375.42 1375.42 -Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 1206.26 1206.26 -Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 1844.92 1844.92 -Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 1290.35 1290.35 Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1789.69 1789.69 -Manufacturer#5 almond antique medium spring khaki 6 1611.66 1611.66 1611.66 -Manufacturer#5 almond antique sky peru orange 2 1788.73 1788.73 1788.73 -Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 1018.1 1018.1 -Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 1464.48 1464.48 PREHOOK: query: -- 39. testUDFOnOrderCols select p_mfgr, p_type, substr(p_type, 2) as short_ptype, rank() over (partition by p_mfgr order by substr(p_type, 2)) as r @@ -2054,27 +2054,27 @@ from part POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 ECONOMY PLATED COPPER CONOMY PLATED COPPER 1 +Manufacturer#3 MEDIUM BURNISHED BRASS EDIUM BURNISHED BRASS 2 +Manufacturer#3 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 +Manufacturer#3 PROMO ANODIZED TIN ROMO ANODIZED TIN 4 +Manufacturer#3 STANDARD POLISHED STEEL TANDARD POLISHED STEEL 5 Manufacturer#1 LARGE BRUSHED STEEL ARGE BRUSHED STEEL 1 Manufacturer#1 LARGE BURNISHED STEEL ARGE BURNISHED STEEL 2 Manufacturer#1 PROMO BURNISHED NICKEL ROMO BURNISHED NICKEL 3 Manufacturer#1 PROMO PLATED TIN ROMO PLATED TIN 4 Manufacturer#1 PROMO PLATED TIN ROMO PLATED TIN 4 Manufacturer#1 STANDARD ANODIZED STEEL TANDARD ANODIZED STEEL 6 -Manufacturer#2 ECONOMY POLISHED STEEL CONOMY POLISHED STEEL 1 -Manufacturer#2 MEDIUM ANODIZED COPPER EDIUM ANODIZED COPPER 2 -Manufacturer#2 MEDIUM BURNISHED COPPER EDIUM BURNISHED COPPER 3 -Manufacturer#2 SMALL POLISHED NICKEL MALL POLISHED NICKEL 4 -Manufacturer#2 STANDARD PLATED TIN TANDARD PLATED TIN 5 -Manufacturer#3 ECONOMY PLATED COPPER CONOMY PLATED COPPER 1 -Manufacturer#3 MEDIUM BURNISHED BRASS EDIUM BURNISHED BRASS 2 -Manufacturer#3 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 -Manufacturer#3 PROMO ANODIZED TIN ROMO ANODIZED TIN 4 -Manufacturer#3 STANDARD POLISHED STEEL TANDARD POLISHED STEEL 5 Manufacturer#4 ECONOMY BRUSHED COPPER CONOMY BRUSHED COPPER 1 Manufacturer#4 SMALL BRUSHED BRASS MALL BRUSHED BRASS 2 Manufacturer#4 SMALL PLATED STEEL MALL PLATED STEEL 3 Manufacturer#4 PROMO POLISHED STEEL ROMO POLISHED STEEL 4 Manufacturer#4 STANDARD ANODIZED TIN TANDARD ANODIZED TIN 5 +Manufacturer#2 ECONOMY POLISHED STEEL CONOMY POLISHED STEEL 1 +Manufacturer#2 MEDIUM ANODIZED COPPER EDIUM ANODIZED COPPER 2 +Manufacturer#2 MEDIUM BURNISHED COPPER EDIUM BURNISHED COPPER 3 +Manufacturer#2 SMALL POLISHED NICKEL MALL POLISHED NICKEL 4 +Manufacturer#2 STANDARD PLATED TIN TANDARD PLATED TIN 5 Manufacturer#5 LARGE BRUSHED BRASS ARGE BRUSHED BRASS 1 Manufacturer#5 ECONOMY BURNISHED STEEL CONOMY BURNISHED STEEL 2 Manufacturer#5 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 @@ -2094,27 +2094,27 @@ select p_mfgr, p_name, p_size, POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 4272.34 +Manufacturer#3 almond antique misty red olive 1 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 7532.61 Manufacturer#1 almond antique burnished rose metallic 2 1173.15 Manufacturer#1 almond antique burnished rose metallic 2 2346.3 Manufacturer#1 almond antique chartreuse lavender yellow 34 4100.06 Manufacturer#1 almond antique salmon chartreuse burlywood 6 5702.650000000001 Manufacturer#1 almond aquamarine burnished black steel 28 7117.070000000001 Manufacturer#1 almond aquamarine pink moccasin thistle 42 8749.730000000001 -Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 -Manufacturer#2 almond antique violet turquoise frosted 40 3491.38 -Manufacturer#2 almond aquamarine midnight light salmon 2 5523.360000000001 -Manufacturer#2 almond aquamarine rose maroon antique 25 7222.02 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 8923.62 -Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 -Manufacturer#3 almond antique forest lavender goldenrod 14 2861.95 -Manufacturer#3 almond antique metallic orange dim 19 4272.34 -Manufacturer#3 almond antique misty red olive 1 6195.32 -Manufacturer#3 almond antique olive coral navajo 45 7532.61 Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 Manufacturer#4 almond antique violet mint lemon 39 2996.09 Manufacturer#4 almond aquamarine floral ivory bisque 27 4202.35 Manufacturer#4 almond aquamarine yellow dodger mint 7 6047.27 Manufacturer#4 almond azure aquamarine papaya violet 12 7337.620000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 8923.62 Manufacturer#5 almond antique blue firebrick mint 31 1789.69 Manufacturer#5 almond antique medium spring khaki 6 3401.3500000000004 Manufacturer#5 almond antique sky peru orange 2 5190.08 @@ -2134,27 +2134,27 @@ select p_mfgr, p_name, p_size, POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique misty red olive 1 1922.98 +Manufacturer#3 almond antique forest lavender goldenrod 14 3113.25 +Manufacturer#3 almond antique chartreuse khaki white 17 4784.93 +Manufacturer#3 almond antique metallic orange dim 19 6195.320000000001 +Manufacturer#3 almond antique olive coral navajo 45 7532.610000000001 Manufacturer#1 almond antique burnished rose metallic 2 2346.3 Manufacturer#1 almond antique burnished rose metallic 2 2346.3 Manufacturer#1 almond antique salmon chartreuse burlywood 6 3948.8900000000003 Manufacturer#1 almond aquamarine burnished black steel 28 5363.31 Manufacturer#1 almond antique chartreuse lavender yellow 34 7117.070000000001 Manufacturer#1 almond aquamarine pink moccasin thistle 42 8749.730000000001 -Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 -Manufacturer#2 almond antique violet chocolate turquoise 14 3722.66 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5424.26 -Manufacturer#2 almond aquamarine rose maroon antique 25 7122.92 -Manufacturer#2 almond antique violet turquoise frosted 40 8923.62 -Manufacturer#3 almond antique misty red olive 1 1922.98 -Manufacturer#3 almond antique forest lavender goldenrod 14 3113.25 -Manufacturer#3 almond antique chartreuse khaki white 17 4784.93 -Manufacturer#3 almond antique metallic orange dim 19 6195.320000000001 -Manufacturer#3 almond antique olive coral navajo 45 7532.610000000001 Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 Manufacturer#4 almond antique gainsboro frosted violet 10 3465.59 Manufacturer#4 almond azure aquamarine papaya violet 12 4755.9400000000005 Manufacturer#4 almond aquamarine floral ivory bisque 27 5962.200000000001 Manufacturer#4 almond antique violet mint lemon 39 7337.620000000001 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 +Manufacturer#2 almond antique violet chocolate turquoise 14 3722.66 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5424.26 +Manufacturer#2 almond aquamarine rose maroon antique 25 7122.92 +Manufacturer#2 almond antique violet turquoise frosted 40 8923.62 Manufacturer#5 almond antique sky peru orange 2 1788.73 Manufacturer#5 almond antique medium spring khaki 6 3400.3900000000003 Manufacturer#5 almond azure blanched chiffon midnight 23 4864.870000000001 @@ -2174,27 +2174,27 @@ select p_mfgr, p_name, p_size, POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique chartreuse khaki white 17 7532.61 +Manufacturer#3 almond antique forest lavender goldenrod 14 5860.929999999999 +Manufacturer#3 almond antique metallic orange dim 19 4670.66 +Manufacturer#3 almond antique misty red olive 1 3260.27 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 Manufacturer#1 almond antique burnished rose metallic 2 8749.730000000001 Manufacturer#1 almond antique burnished rose metallic 2 7576.58 Manufacturer#1 almond antique chartreuse lavender yellow 34 6403.43 Manufacturer#1 almond antique salmon chartreuse burlywood 6 4649.67 Manufacturer#1 almond aquamarine burnished black steel 28 3047.08 Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 -Manufacturer#2 almond antique violet chocolate turquoise 14 8923.62 -Manufacturer#2 almond antique violet turquoise frosted 40 7232.9400000000005 -Manufacturer#2 almond aquamarine midnight light salmon 2 5432.24 -Manufacturer#2 almond aquamarine rose maroon antique 25 3400.26 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 -Manufacturer#3 almond antique chartreuse khaki white 17 7532.61 -Manufacturer#3 almond antique forest lavender goldenrod 14 5860.929999999999 -Manufacturer#3 almond antique metallic orange dim 19 4670.66 -Manufacturer#3 almond antique misty red olive 1 3260.27 -Manufacturer#3 almond antique olive coral navajo 45 1337.29 Manufacturer#4 almond antique gainsboro frosted violet 10 7337.620000000001 Manufacturer#4 almond antique violet mint lemon 39 5716.950000000001 Manufacturer#4 almond aquamarine floral ivory bisque 27 4341.530000000001 Manufacturer#4 almond aquamarine yellow dodger mint 7 3135.27 Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 +Manufacturer#2 almond antique violet chocolate turquoise 14 8923.62 +Manufacturer#2 almond antique violet turquoise frosted 40 7232.9400000000005 +Manufacturer#2 almond aquamarine midnight light salmon 2 5432.24 +Manufacturer#2 almond aquamarine rose maroon antique 25 3400.26 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 Manufacturer#5 almond antique blue firebrick mint 31 7672.66 Manufacturer#5 almond antique medium spring khaki 6 5882.970000000001 Manufacturer#5 almond antique sky peru orange 2 4271.3099999999995 @@ -2214,27 +2214,27 @@ select p_mfgr, p_name, p_size, POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### +Manufacturer#3 almond antique misty red olive 1 7532.610000000001 +Manufacturer#3 almond antique forest lavender goldenrod 14 5609.63 +Manufacturer#3 almond antique chartreuse khaki white 17 4419.360000000001 +Manufacturer#3 almond antique metallic orange dim 19 2747.6800000000003 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 Manufacturer#1 almond antique burnished rose metallic 2 8749.730000000001 Manufacturer#1 almond antique burnished rose metallic 2 8749.730000000001 Manufacturer#1 almond antique salmon chartreuse burlywood 6 6403.43 Manufacturer#1 almond aquamarine burnished black steel 28 4800.84 Manufacturer#1 almond antique chartreuse lavender yellow 34 3386.42 Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 -Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 -Manufacturer#2 almond antique violet chocolate turquoise 14 6891.639999999999 -Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5200.96 -Manufacturer#2 almond aquamarine rose maroon antique 25 3499.36 -Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 -Manufacturer#3 almond antique misty red olive 1 7532.610000000001 -Manufacturer#3 almond antique forest lavender goldenrod 14 5609.63 -Manufacturer#3 almond antique chartreuse khaki white 17 4419.360000000001 -Manufacturer#3 almond antique metallic orange dim 19 2747.6800000000003 -Manufacturer#3 almond antique olive coral navajo 45 1337.29 Manufacturer#4 almond aquamarine yellow dodger mint 7 7337.620000000001 Manufacturer#4 almond antique gainsboro frosted violet 10 5492.7 Manufacturer#4 almond azure aquamarine papaya violet 12 3872.0299999999997 Manufacturer#4 almond aquamarine floral ivory bisque 27 2581.6800000000003 Manufacturer#4 almond antique violet mint lemon 39 1375.42 +Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 +Manufacturer#2 almond antique violet chocolate turquoise 14 6891.639999999999 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5200.96 +Manufacturer#2 almond aquamarine rose maroon antique 25 3499.36 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 Manufacturer#5 almond antique sky peru orange 2 7672.660000000002 Manufacturer#5 almond antique medium spring khaki 6 5883.93 Manufacturer#5 almond azure blanched chiffon midnight 23 4272.27 @@ -2314,12 +2314,12 @@ where p_mfgr='Manufacturer#1' POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### -1173.15 1458.2883333333336 8749.730000000001 -1173.15 1515.3160000000003 7576.580000000002 -1753.76 1600.8575000000003 6403.430000000001 -1602.59 1549.8900000000003 4649.670000000001 -1414.42 1523.5400000000004 3047.080000000001 -1632.66 1632.6600000000008 1632.6600000000008 +1173.15 1366.6866666666667 4100.06 +1173.15 1366.6866666666667 4100.06 +1753.76 1366.6866666666667 4100.06 +1602.59 1508.505 3017.01 +1414.42 1508.505 3017.01 +1632.66 1632.66 1632.66 PREHOOK: query: -- 47. empty partition select sum(p_size) over (partition by p_mfgr ) from part where p_mfgr = 'm1'