diff --git a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index 61563e4..6b0498b 100644 --- a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -34,7 +34,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 31) - Reducer 8 <- Map 5 (PARTITION-LEVEL SORT, 31) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 31), Reducer 6 (PARTITION-LEVEL SORT, 31), Reducer 8 (PARTITION-LEVEL SORT, 31) Reducer 4 <- Reducer 3 (GROUP, 31) #### A masked pattern was here #### @@ -66,6 +66,20 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Select Operator @@ -188,7 +202,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 3 <- Map 5 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 4 @@ -212,6 +226,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: Group By Operator @@ -349,8 +384,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) - Reducer 6 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -369,6 +404,38 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Map 3 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Map 5 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -496,7 +563,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 1) Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 31), Map 7 (PARTITION-LEVEL SORT, 31), Reducer 2 (PARTITION-LEVEL SORT, 31), Reducer 6 (PARTITION-LEVEL SORT, 31) - Reducer 6 <- Map 1 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -533,6 +600,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 167 Data size: 2035 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Map 7 Map Operator Tree: TableScan @@ -713,11 +796,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 1 (PARTITION-LEVEL SORT, 31), Map 16 (PARTITION-LEVEL SORT, 31) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 31), Map 16 (PARTITION-LEVEL SORT, 31) Reducer 13 <- Map 17 (PARTITION-LEVEL SORT, 31), Reducer 12 (PARTITION-LEVEL SORT, 31) Reducer 14 <- Map 18 (PARTITION-LEVEL SORT, 31), Reducer 13 (PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31), Map 8 (PARTITION-LEVEL SORT, 31) + Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 31), Reducer 2 (PARTITION-LEVEL SORT, 31) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 31), Reducer 3 (PARTITION-LEVEL SORT, 31) Reducer 15 <- Reducer 14 (GROUP, 31) - Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 31), Reducer 13 (PARTITION-LEVEL SORT, 31) Reducer 5 <- Reducer 4 (GROUP, 31) Reducer 6 <- Reducer 15 (PARTITION-LEVEL SORT, 31), Reducer 5 (PARTITION-LEVEL SORT, 31) Reducer 7 <- Reducer 6 (SORT, 1) @@ -750,6 +835,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: d_date_sk (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map 11 + Map Operator Tree: + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((inv_item_sk is not null and inv_warehouse_sk is not null) and inv_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: inv_item_sk (type: int) + sort order: + + Map-reduce partition columns: inv_item_sk (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: inv_date_sk (type: int), inv_quantity_on_hand (type: int), inv_warehouse_sk (type: int) Map 16 Map Operator Tree: TableScan @@ -790,6 +889,33 @@ STAGE PLANS: sort order: + Map-reduce partition columns: d_date_sk (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: i_item_sk (type: int) + sort order: + + Map-reduce partition columns: i_item_sk (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: warehouse + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: w_warehouse_sk is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: w_warehouse_sk (type: int) + sort order: + + Map-reduce partition columns: w_warehouse_sk (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: w_warehouse_name (type: string) Reducer 12 Reduce Operator Tree: Join Operator @@ -859,19 +985,55 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (CASE (_col5) WHEN (0) THEN (0) ELSE ((_col4 / _col5)) END > 1) (type: boolean) + Select Operator + expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: double), _col5 (type: double) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: double), CASE (_col5) WHEN (0) THEN (null) ELSE ((_col4 / _col5)) END (type: double) - outputColumnNames: _col1, _col2, _col3, _col5, _col6 + Filter Operator + predicate: (CASE (_col5) WHEN (0) THEN (0) ELSE ((_col4 / _col5)) END > 1) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Select Operator + expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: double), CASE (_col5) WHEN (0) THEN (null) ELSE ((_col4 / _col5)) END (type: double) + outputColumnNames: _col1, _col2, _col3, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: double), _col6 (type: double) + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: double), _col6 (type: double) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 inv_item_sk (type: int) + 1 i_item_sk (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col7 (type: int) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 w_warehouse_sk (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col7, _col11, _col12 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col7 (type: int), _col11 (type: int), _col12 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -909,19 +1071,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (CASE (_col5) WHEN (0) THEN (0) ELSE ((_col4 / _col5)) END > 1) (type: boolean) + Select Operator + expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: double), _col5 (type: double) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: double), CASE (_col5) WHEN (0) THEN (null) ELSE ((_col4 / _col5)) END (type: double) - outputColumnNames: _col1, _col2, _col3, _col5, _col6 + Filter Operator + predicate: (CASE (_col5) WHEN (0) THEN (0) ELSE ((_col4 / _col5)) END > 1) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Select Operator + expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: double), CASE (_col5) WHEN (0) THEN (null) ELSE ((_col4 / _col5)) END (type: double) + outputColumnNames: _col1, _col2, _col3, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: double), _col6 (type: double) + Reduce Output Operator + key expressions: _col2 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: int), _col5 (type: double), _col6 (type: double) Reducer 6 Reduce Operator Tree: Join Operator @@ -986,7 +1152,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31), Map 1 (PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 31), Map 3 (PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -1003,6 +1169,20 @@ STAGE PLANS: sort order: + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) + Map 3 + Map Operator Tree: + TableScan + alias: inventory + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: inv_date_sk (type: int), inv_item_sk (type: int), inv_quantity_on_hand (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Reducer 2 Reduce Operator Tree: Select Operator diff --git a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index aaea86d..fecfe0a 100644 --- a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1247,8 +1247,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1261,47 +1260,30 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Group By Operator + aggregations: count(DISTINCT _col0) keys: _col0 (type: int) mode: hash - outputColumnNames: _col0 + outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: int) + aggregations: count(DISTINCT KEY._col0:0._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 1752000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col0) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized Stage: Stage-0 Fetch Operator