diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 2bf64dcfbd..bccf231ed9 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -933,7 +933,6 @@ spark.query.files=add_part_multiple.q, \ auto_join_without_localtask.q, \ auto_smb_mapjoin_14.q, \ auto_sortmerge_join_1.q, \ - auto_sortmerge_join_10.q, \ auto_sortmerge_join_12.q, \ auto_sortmerge_join_13.q, \ auto_sortmerge_join_14.q, \ diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out deleted file mode 100644 index 9c6bd7b3e1..0000000000 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out +++ /dev/null @@ -1,300 +0,0 @@ -PREHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tbl1 -POSTHOOK: query: CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tbl1 -PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tbl2 -POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tbl2 -PREHOOK: query: insert overwrite table tbl1 -select * from src where key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tbl1 -POSTHOOK: query: insert overwrite table tbl1 -select * from src where key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tbl1 -POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table tbl2 -select * from src where key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@tbl2 -POSTHOOK: query: insert overwrite table tbl2 -select * from src where key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@tbl2 -POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select count(*) from - ( - select * from - (select a.key as key, a.value as value from tbl1 a where key < 6 - union all - select a.key as key, a.value as value from tbl1 a where key < 6 - ) usubq1 ) subq1 - join - (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 - on subq1.key = subq2.key -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(*) from - ( - select * from - (select a.key as key, a.value as value from tbl1 a where key < 6 - union all - select a.key as key, a.value as value from tbl1 a where key < 6 - ) usubq1 ) subq1 - join - (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 - on subq1.key = subq2.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from - ( - select * from - (select a.key as key, a.value as value from tbl1 a where key < 6 - union all - select a.key as key, a.value as value from tbl1 a where key < 6 - ) usubq1 ) subq1 - join - (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 - on subq1.key = subq2.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl1 -PREHOOK: Input: default@tbl2 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from - ( - select * from - (select a.key as key, a.value as value from tbl1 a where key < 6 - union all - select a.key as key, a.value as value from tbl1 a where key < 6 - ) usubq1 ) subq1 - join - (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 - on subq1.key = subq2.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl1 -POSTHOOK: Input: default@tbl2 -#### A masked pattern was here #### -40 -PREHOOK: query: explain -select count(*) from - (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 - join - (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 - on subq1.key = subq2.key -PREHOOK: type: QUERY -POSTHOOK: query: explain -select count(*) from - (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 - join - (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 - on subq1.key = subq2.key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: int) - mode: final - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from - (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 - join - (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 - on subq1.key = subq2.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl1 -PREHOOK: Input: default@tbl2 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from - (select a.key as key, count(*) as value from tbl1 a where key < 6 group by a.key) subq1 - join - (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 - on subq1.key = subq2.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl1 -POSTHOOK: Input: default@tbl2 -#### A masked pattern was here #### -8 diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out index c9b03a1283..192689a737 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out @@ -67,14 +67,15 @@ ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' and (a.key = 0 or a.key = 5) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -89,40 +90,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) @@ -134,9 +108,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -248,14 +220,15 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -270,40 +243,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) @@ -315,9 +261,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -435,14 +379,15 @@ ON a.key = b.key WHERE a.key = 0 or a.key = 5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -457,40 +402,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Local Work: - Map Reduce Local Work - - Stage: Stage-1 - Spark - Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean) - Statistics: Num rows: 28 Data size: 245 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 245 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) @@ -502,9 +420,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)