Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-9277

Crash due to unhandled exception thrown from orc::ColumnSelector::updateSelectedByTypeId

Attach filesAttach ScreenshotVotersWatch issueWatchersCreate sub-taskLinkCloneUpdate Comment AuthorReplace String in CommentUpdate Comment VisibilityDelete Comments
    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Blocker
    • Resolution: Fixed
    • None
    • Impala 3.4.0
    • None
    • None

    Description

      Build latest Impala with latest ORC lib and run test_fuzz_scanner for ORC format:

      • Impala git hash: 497a17dbdc0669abd47c2360b8ca94de8b54d413
      • ORC git hash: c26ff4c351d7c34c4272442a6874703f510282a8

      Found the crash:

      Operating system: Linux
                        0.0.0 Linux 4.15.0-72-generic #81~16.04.1-Ubuntu SMP Tue Nov 26 16:34:21 UTC 2019 x86_64
      CPU: amd64
           family 6 model 158 stepping 10
           1 CPU
      
      GPU: UNKNOWN
      
      Crash reason:  SIGABRT
      Crash address: 0x3e8000048f0
      Process uptime: not available
      
      Thread 319 (crashed)
       0  libc-2.23.so + 0x35428
       1  libc-2.23.so + 0x3702a
       2  impalad!_fini + 0x15bae90
       3  libc-2.23.so + 0x79242
       4  libc-2.23.so + 0x79242
       5  libstdc++.so.6.0.21 + 0x8c880
       6  libstdc++.so.6.0.21 + 0x8f84d
       7  impalad!_fini + 0x15baeb0
       8  impalad + 0x4b984e0
       9  libstdc++.so.6.0.21 + 0x8d6b6
      10  libstdc++.so.6.0.21 + 0x8d701
      11  libstdc++.so.6.0.21 + 0x8d919
      12  impalad!orc::ColumnSelector::updateSelectedByTypeId(std::vector<bool, std::allocator<bool> >&, unsigned long) [Reader.cc : 166 + 0x12]
      13  impalad!orc::ColumnSelector::updateSelected(std::vector<bool, std::allocator<bool> >&, orc::RowReaderOptions const&) [Reader.cc : 136 + 0xf]
      14  impalad!orc::RowReaderImpl::RowReaderImpl(std::shared_ptr<orc::FileContents>, orc::RowReaderOptions const&) [Reader.cc : 229 + 0x11]
      15  impalad!orc::ReaderImpl::createRowReader(orc::RowReaderOptions const&) const [Reader.cc : 725 + 0x1b]
      16  impalad!impala::HdfsOrcScanner::Open(impala::ScannerContext*) [hdfs-orc-scanner.cc : 198 + 0x3c]
      17  impalad!impala::HdfsScanNodeBase::CreateAndOpenScannerHelper(impala::HdfsPartitionDescriptor*, impala::ScannerContext*, boost::scoped_ptr<impala::HdfsScanner>*) [hdfs-scan-node-base.cc : 819 + 0x29]
      18  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 494 + 0x2b]
      19  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 416 + 0x2a]
      20  impalad!impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::{lambda()#1}::operator()() const + 0x30
      21  impalad!boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::<lambda()>, void>::invoke [function_template.hpp : 153 + 0xc]
      22  impalad!boost::function0<void>::operator()() const [function_template.hpp : 767 + 0x11]
      23  impalad!impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0xf]
      24  impalad!void boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) [bind.hpp : 525 + 0x15]
      25  impalad!boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >::operator()() [bind_template.hpp : 20 + 0x22]
      26  impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run() [thread.hpp : 116 + 0x12]
      27  impalad!thread_proxy + 0xda
      28  libpthread-2.23.so + 0x76ba
      29  libc-2.23.so + 0x10741d
      

      Code snipper for orc Reader.cc:166

      158  void ColumnSelector::updateSelectedByTypeId(std::vector<bool>& selectedColumns, uint64_t typeId) {
      159    if (typeId < selectedColumns.size()) {
      160      const Type& type = *idTypeMap[typeId];
      161      selectChildren(selectedColumns, type);
      162    } else {
      163      std::stringstream buffer;
      164      buffer << "Invalid type id selected " << typeId << " out of "
      165             << selectedColumns.size();
      166      throw ParseError(buffer.str());
      167    }
      168  }
      

      Code snipper for impala hdfs-orc-scanner.cc:198

      197  unique_ptr<orc::RowReader> tmp_row_reader =
      198      reader_->createRowReader(row_reader_options_);
      199  const orc::Type* root_type = &tmp_row_reader->getSelectedType();
      200  DCHECK_EQ(root_type->getKind(), orc::TypeKind::STRUCT);
      201  orc_root_reader_ = this->obj_pool_.Add(
      202      new OrcStructReader(root_type, scan_node_->tuple_desc(), this));
      

      How to reproduce
      The query is

      select count(*) from test_fuzz_nested_types_4f03937d.complextypestbl q

      CreateTable DDL:

      CREATE TABLE test_fuzz_nested_types_4f03937d.complextypestbl (
        id BIGINT,
        int_array ARRAY<INT>,
        int_array_array ARRAY<ARRAY<INT>>,
        int_map MAP<STRING,INT>,
        int_map_array ARRAY<MAP<STRING,INT>>,
        nested_struct STRUCT<a:INT,b:ARRAY<INT>,c:STRUCT<d:ARRAY<ARRAY<STRUCT<e:INT,f:STRING>>>>,g:MAP<STRING,STRUCT<h:STRUCT<i:ARRAY<DOUBLE>>>>>
      )
      STORED AS ORC
      

      Attached the malformed orc file

      Attachments

        Issue Links

        Activity

          This comment will be Viewable by All Users Viewable by All Users
          Cancel

          People

            boroknagyz Zoltán Borók-Nagy
            stigahuang Quanlong Huang
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Slack

                Issue deployment