diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 4a2daab..cac1204 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -620,6 +620,8 @@ spark.query.files=add_part_multiple.q \ vectorization_part_project.q \ vectorized_timestamp_funcs.q \ vectorized_ptf.q \ + vectorized_mapjoin.q \ + vectorized_nested_mapjoin.q \ windowing.q \ subquery_exists.q \ subquery_in.q \ diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out new file mode 100644 index 0000000..979a777 --- /dev/null +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -0,0 +1,96 @@ +PREHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT), Map 4 (GROUP PARTITION-LEVEL SORT) + Reducer 3 <- Reducer 2 (GROUP) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Filter Operator + predicate: cint is not null (type: boolean) + Reduce Output Operator + key expressions: cint (type: int) + sort order: + + Map-reduce partition columns: cint (type: int) + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Filter Operator + predicate: cint is not null (type: boolean) + Reduce Output Operator + key expressions: cint (type: int) + sort order: + + Map-reduce partition columns: cint (type: int) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col2, _col17 + Select Operator + expressions: _col2 (type: int), _col17 (type: int) + outputColumnNames: _col2, _col17 + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) + FROM alltypesorc t1 + JOIN alltypesorc t2 ON t1.cint = t2.cint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +3152013 1073680599 -1073279343 9.375396162525452E8 diff --git ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out new file mode 100644 index 0000000..422179e --- /dev/null +++ ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -0,0 +1,121 @@ +PREHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +PREHOOK: type: QUERY +POSTHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT), Reducer 5 (GROUP PARTITION-LEVEL SORT) + Reducer 3 <- Reducer 2 (GROUP) + Reducer 5 <- Map 4 (GROUP PARTITION-LEVEL SORT), Map 6 (GROUP PARTITION-LEVEL SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: v3 + Filter Operator + predicate: csmallint is not null (type: boolean) + Reduce Output Operator + key expressions: csmallint (type: smallint) + sort order: + + Map-reduce partition columns: csmallint (type: smallint) + Execution mode: vectorized + Map 4 + Map Operator Tree: + TableScan + alias: v1 + Filter Operator + predicate: (ctinyint is not null and csmallint is not null) (type: boolean) + Reduce Output Operator + key expressions: ctinyint (type: tinyint) + sort order: + + Map-reduce partition columns: ctinyint (type: tinyint) + value expressions: csmallint (type: smallint), cdouble (type: double) + Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: v2 + Filter Operator + predicate: ctinyint is not null (type: boolean) + Reduce Output Operator + key expressions: ctinyint (type: tinyint) + sort order: + + Map-reduce partition columns: ctinyint (type: tinyint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col1 + Select Operator + expressions: _col1 (type: double) + outputColumnNames: _col1 + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col15 + Filter Operator + predicate: (_col0 = _col15) (type: boolean) + Select Operator + expressions: _col1 (type: smallint), _col5 (type: double) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + value expressions: _col1 (type: double) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +6.065190932485957E11