Details
-
Bug
-
Status: Resolved
-
Critical
-
Resolution: Fixed
-
6.0.0
-
R version 4.0.4
Description
library(tidyverse) library(arrow) car_info <- rownames_to_column(mtcars, "car_info") cars_arrow_table <- arrow_table(car_info) other_mtcars_data <- select(car_info, 1) %>% mutate(main_color = sample( c("red", "blue", "white", "black"), size = n(), replace = TRUE)) %>% arrow::arrow_table() temp <- tempdir() par_temp <- paste0(temp, "\\parquet") car_info %>% arrow::write_dataset(par_temp) cars_arrow <- arrow::open_dataset(par_temp) # using arrow tables works ------------------------------------------------------ cars_arrow_table %>% left_join(other_mtcars_data) %>% count(main_color) %>% collect() # using open dataset crashes R ------------------------------------------------------------------ other_mtcars_data %>% left_join(cars_arrow) %>% count(main_color) %>% collect() #other variation also crash cars_arrow %>% left_join(other_mtcars_data) %>% count(main_color) %>% collect() cars_arrow %>% left_join(other_mtcars_data) %>% group_by(main_color) %>% summarise(n = n()) %>% collect() #compute also crashes cars_arrow %>% left_join(other_mtcars_data) %>% count(main_color) %>% compute() # workaround with duckdb ------------------------------------------------------ ##this works cars_duck <- to_duckdb(cars_arrow, auto_disconnect = TRUE) other_cars_duck <- to_duckdb(other_mtcars_data, auto_disconnect = TRUE) cars_duck %>% left_join(other_cars_duck) %>% count(main_color) %>% collect() ##this doesn't (don't know if expected to work actually) cars_arrow %>% left_join(other_mtcars_data) %>% to_duckdb()
Attachments
Issue Links
- is duplicated by
-
ARROW-15397 [R] Problem with Join in apache arrow in R
- Closed
- relates to
-
ARROW-15718 [R] Joining two datasets crashes if use_threads=FALSE
- Resolved
- links to