Uploaded image for project: 'Apache Arrow'
  1. Apache Arrow
  2. ARROW-12668

[C++][Dataset] CountRows occasionally segfaulting

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Major
    • Resolution: Fixed
    • None
    • 5.0.0
    • C++

    Description

      https://github.com/apache/arrow/pull/9656/checks?check_run_id=2518312525

      Start test: dim() correctly determine numbers of rows and columns on arrow_dplyr_query object
      
       *** caught segfault ***
      address 0x7ff7cf2cf8f8, cause 'invalid permissions'
      
      Traceback:
       1: dataset___Scanner__CountRows(self)
       2: scanner$CountRows()
       3: dim.arrow_dplyr_query(.)
       4: dim(.)
       5: ds %>% filter(chr == "a") %>% dim()
       6: eval_bare(expr, quo_get_env(quo))
       7: quasi_label(enquo(object), label, arg = "object")
       8: expect_identical(ds %>% filter(chr == "a") %>% dim(), c(2L, 7L))
       9: eval(code, test_env)
      10: eval(code, test_env)
      11: withCallingHandlers({    eval(code, test_env)    if (!handled && !is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, skip = handle_skip, warning = handle_warning,     message = handle_message, error = handle_error)
      12: doTryCatch(return(expr), name, parentenv, handler)
      13: tryCatchOne(expr, names, parentenv, handlers[[1L]])
      14: tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
      15: doTryCatch(return(expr), name, parentenv, handler)
      16: tryCatchOne(tryCatchList(expr, names[-nh], parentenv, handlers[-nh]),     names[nh], parentenv, handlers[[nh]])
      17: tryCatchList(expr, classes, parentenv, handlers)
      18: tryCatch(withCallingHandlers({    eval(code, test_env)    if (!handled && !is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, skip = handle_skip, warning = handle_warning,     message = handle_message, error = handle_error), error = handle_fatal,     skip = function(e) {    })
      19: test_code(desc, code, env = parent.frame(), reporter = reporter)
      20: testthat::test_that(what, {    skip_if(getOption("..skip.tests", TRUE), "arrow C++ library not available")    code})
      21: test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_query object",     {        skip_if_not_available("parquet")        ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))        expect_identical(ds %>% filter(chr == "a") %>% dim(),             c(2L, 7L))        expect_equal(ds %>% select(chr, fct, int) %>% dim(),             c(20L, 3L))        expect_identical(ds %>% select(chr, fct, int) %>% filter(chr ==             "a") %>% dim(), c(2L, 3L))    })
      22: eval(code, test_env)
      23: eval(code, test_env)
      24: withCallingHandlers({    eval(code, test_env)    if (!handled && !is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, skip = handle_skip, warning = handle_warning,     message = handle_message, error = handle_error)
      25: doTryCatch(return(expr), name, parentenv, handler)
      26: tryCatchOne(expr, names, parentenv, handlers[[1L]])
      27: tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
      28: doTryCatch(return(expr), name, parentenv, handler)
      29: tryCatchOne(tryCatchList(expr, names[-nh], parentenv, handlers[-nh]),     names[nh], parentenv, handlers[[nh]])
      30: tryCatchList(expr, classes, parentenv, handlers)
      31: tryCatch(withCallingHandlers({    eval(code, test_env)    if (!handled && !is.null(test)) {        skip_empty()    }}, expectation = handle_expectation, skip = handle_skip, warning = handle_warning,     message = handle_message, error = handle_error), error = handle_fatal,     skip = function(e) {    })
      32: test_code(NULL, exprs, env)
      33: source_file(path, child_env(env), wrap = wrap)
      34: FUN(X[[i]], ...)
      35: lapply(test_paths, test_one_file, env = env, wrap = wrap)
      36: force(code)
      37: doWithOneRestart(return(expr), restart)
      38: withOneRestart(expr, restarts[[1L]])
      39: withRestarts(testthat_abort_reporter = function() NULL, force(code))
      40: with_reporter(reporters$multi, lapply(test_paths, test_one_file,     env = env, wrap = wrap))
      41: test_files(test_dir = test_dir, test_package = test_package,     test_paths = test_paths, load_helpers = load_helpers, reporter = reporter,     env = env, stop_on_failure = stop_on_failure, stop_on_warning = stop_on_warning,     wrap = wrap, load_package = load_package)
      42: test_files(test_dir = path, test_paths = test_paths, test_package = package,     reporter = reporter, load_helpers = load_helpers, env = env,     stop_on_failure = stop_on_failure, stop_on_warning = stop_on_warning,     wrap = wrap, load_package = load_package, parallel = parallel)
      43: test_dir("testthat", package = package, reporter = reporter,     ..., load_package = "installed")
      44: test_check("arrow", reporter = arrow_reporter)
      An irrecoverable exception occurred. R is aborting now ...

      The test also seems to give the wrong results sometimes (https://github.com/apache/arrow/pull/9656/checks?check_run_id=2518312803)

      == Failed tests ================================================================
      -- Failure (test-dataset.R:148:3): dim() correctly determine numbers of rows and columns on arrow_dplyr_query object --
      ds %>% filter(chr == "a") %>% dim() not identical to c(2L, 7L).
      1/2 mismatches
      [1] 1 - 2 == -1
       

      Attachments

        Issue Links

          Activity

            People

              lidavidm David Li
              lidavidm David Li
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Time Tracking

                  Estimated:
                  Original Estimate - Not Specified
                  Not Specified
                  Remaining:
                  Remaining Estimate - 0h
                  0h
                  Logged:
                  Time Spent - 10m
                  10m