Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-12123

SIGSEGV in ScanRange::ReadSubRanges() when using HDFS caching

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Critical
    • Resolution: Fixed
    • Impala 4.0.0, Impala 3.4.0, Impala 3.4.1, Impala 4.1.0, Impala 4.2.0, Impala 4.1.1, Impala 4.1.2, Impala 4.3.0
    • Impala 4.3.0
    • Backend
    • None

    Description

      We have seen a crash where multiple executors hit this SIGSEGV simultaneously:

       

      #0  0x00007f42a5112cb5 in ?? ()
      #1  0x0000000001742dab in impala::io::ScanRange::ReadSubRanges (this=this@entry=0x9d1bc940, queue=queue@entry=0x11c700a0, buffer_desc=buffer_desc@entry=0x6f73f2c0, eof=eof@entry=0x7f39bbddb727) at scan-range.cc:275
      #2  0x000000000174550b in impala::io::ScanRange::DoRead (this=this@entry=0x9d1bc940, queue=queue@entry=0x11c700a0, disk_id=7) at scan-range.cc:219
      #3  0x000000000173a0d6 in impala::io::DiskQueue::DiskThreadLoop (this=0x11c700a0, io_mgr=0x1273e8c0) at disk-io-mgr.cc:504
      #4  0x00000000014b0355 in boost::function0<void>::operator() (this=0x7f39bbddbb40) at ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/function/function_template.hpp:770
      #5  impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) (name=..., category=..., functor=..., parent_thread_info=<optimized out>, thread_started=0x7ffea5eca350) at thread.cc:360
      #6  0x00000000014b171b in boost::_bi::list5<boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) (
          a=<synthetic pointer>..., f=<error reading variable>, this=0x13b30800) at ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/bind/bind.hpp:531
      #7  boost::_bi::bind_t<void, void (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >::operator()() (this=0x13b307f8)
          at ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/bind/bind.hpp:1222
      #8  boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run() (this=0x13b30640)
          at ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/thread/detail/thread.hpp:116
      #9  0x0000000001caf602 in thread_proxy ()
      #10 0x00007f42a8420ea5 in ?? ()
      #11 0x0000000000000000 in ?? ()

      The error reported is:

       

       

      Crash reason:  SIGSEGV
      Crash address: 0x7f3903c7f438
      Process uptime: not available

      We are working on finding details about the query that hit this.

       

      This corresponds to this line of code:

       

      Status ScanRange::ReadSubRanges(
          DiskQueue* queue, BufferDescriptor* buffer_desc, bool* eof, FileReader* file_reader) {
        buffer_desc->len_ = 0;
        while (buffer_desc->len() < buffer_desc->buffer_len()
            && sub_range_pos_.index < sub_ranges_.size()) {
          SubRange& sub_range = sub_ranges_[sub_range_pos_.index];
          int64_t offset = sub_range.offset + sub_range_pos_.bytes_read;
          int64_t bytes_to_read = min(sub_range.length -sub_range_pos_.bytes_read,
              buffer_desc->buffer_len() - buffer_desc->len());
          if (cache_.data != nullptr) {
            memcpy(buffer_desc->buffer_ + buffer_desc->len(),
                cache_.data + offset, bytes_to_read); <<< HERE
          } else {

      This is reading from HDFS caching.

       

      Attachments

        Activity

          People

            joemcdonnell Joe McDonnell
            joemcdonnell Joe McDonnell
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: