Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-9324

HdfsOrcScanner crashes in DCHECK failure in OrcSchemaResolver::BuildSchemaPath

Attach filesAttach ScreenshotVotersWatch issueWatchersCreate sub-taskLinkCloneUpdate Comment AuthorReplace String in CommentUpdate Comment VisibilityDelete Comments
    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Blocker
    • Resolution: Fixed
    • None
    • Impala 3.4.0
    • Backend
    • None

    Description

      Hit a crash after running test_fuzz_scanners.py for orc/def/block for 2 days.

      FATAL log:

      F0123 03:46:22.084527 15347 orc-metadata-utils.cc:44] 9b4d52b2f594f58b:687b5fe200000001] Check failed: paths->size() == node.getColumnId() (3 vs. 4)
      

      stacktrace:

      Crash reason:  SIGABRT
      Crash address: 0x3e8000033b3
      Process uptime: not available
      
      Thread 306 (crashed)
       0  libc-2.23.so + 0x35428
       1  libc-2.23.so + 0x3702a
       2  impalad!google_breakpad::ExceptionHandler::HandleSignal(int, siginfo_t*, void*) + 0x1e0
       3  impalad!google::DumpStackTraceAndExit() + 0x24
       4  impalad!google::LogMessage::Fail() + 0xd
       5  impalad!google::LogMessage::SendToLog() + 0x2b2
       6  impalad!google::LogMessage::Flush() + 0x157
       7  impalad!google::LogMessageFatal::~LogMessageFatal() + 0xe
       8  impalad!impala::OrcSchemaResolver::BuildSchemaPath(orc::Type const&, std::vector<int, std::allocator<int> >*, std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > >*) [orc-metadata-utils.cc : 44 + 0xf]
       9  impalad!impala::OrcSchemaResolver::BuildSchemaPaths(int, std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > >*) [orc-metadata-utils.cc : 36 + 0x4a]
      10  impalad!impala::HdfsOrcScanner::Open(impala::ScannerContext*) [hdfs-orc-scanner.cc : 187 + 0x51]
      11  impalad!impala::HdfsScanNodeBase::CreateAndOpenScannerHelper(impala::HdfsPartitionDescriptor*, impala::ScannerContext*, boost::scoped_ptr<impala::HdfsScanner>*) [hdfs-scan-node-base.cc : 819 + 0x29]
      12  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 494 + 0x2b]
      13  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 416 + 0x2a]
      14  impalad!impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::{lambda()#1}::operator()() const + 0x30
      15  impalad!boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::<lambda()>, void>::invoke [function_template.hpp : 153 + 0xc]
      16  impalad!boost::function0<void>::operator()() const [function_template.hpp : 767 + 0x11]
      17  impalad!impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0xf]
      18  impalad!void boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) [bind.hpp : 525 + 0x15]
      19  impalad!boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >::operator()() [bind_template.hpp : 20 + 0x22]
      20  impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run() [thread.hpp : 116 + 0x12]
      21  impalad!thread_proxy + 0xda
      22  libpthread-2.23.so + 0x76ba
      23  libc-2.23.so + 0x10741d
      

      Code snipper

       42 void OrcSchemaResolver::BuildSchemaPath(const orc::Type& node, SchemaPath* path,
       43     vector<SchemaPath>* paths) {
       44   DCHECK_EQ(paths->size(), node.getColumnId());
       45   paths->push_back(*path);
       46   if (node.getKind() == orc::TypeKind::STRUCT) {
       47     int size = node.getSubtypeCount();
       48     for (int i = 0; i < size; ++i) {
       49       path->push_back(i);
       50       const orc::Type* child = node.getSubtype(i);
       51       BuildSchemaPath(*child, path, paths);
       52       path->pop_back();
       53     }
       54   } else if (node.getKind() == orc::TypeKind::LIST) {
       55     DCHECK_EQ(node.getSubtypeCount(), 1);
       56     const orc::Type* child = node.getSubtype(0);
       57     path->push_back(SchemaPathConstants::ARRAY_ITEM);
       58     BuildSchemaPath(*child, path, paths);
       59     path->pop_back();
       60   } else if (node.getKind() == orc::TypeKind::MAP) {
       61     DCHECK_EQ(node.getSubtypeCount(), 2);
       62     const orc::Type* key_child = node.getSubtype(0);
       63     const orc::Type* value_child = node.getSubtype(1);
       64     path->push_back(SchemaPathConstants::MAP_KEY);
       65     BuildSchemaPath(*key_child, path, paths);
       66     (*path)[path->size() - 1] = SchemaPathConstants::MAP_VALUE;
       67     BuildSchemaPath(*value_child, path, paths);
       68     path->pop_back();
       69   }
       70 }
      
      

      Reproduce

      CREATE EXTERNAL TABLE my_complextypes_tbl (
        id BIGINT,
        int_array ARRAY<INT>,
        int_array_array ARRAY<ARRAY<INT>>,
        int_map MAP<STRING,INT>,
        int_map_array ARRAY<MAP<STRING,INT>>,
        nested_struct STRUCT<a:INT,b:ARRAY<INT>,c:STRUCT<d:ARRAY<ARRAY<STRUCT<e:INT,f:STRING>>>>,g:MAP<STRING,STRUCT<h:STRUCT<i:ARRAY<DOUBLE>>>>>
      )
      STORED AS ORC;
      

      Load the attached corrupt ORC file into this table and run:

      select * from my_complextypes_tbl;
      

      Attachments

        Issue Links

        Activity

          This comment will be Viewable by All Users Viewable by All Users
          Cancel

          People

            stigahuang Quanlong Huang
            stigahuang Quanlong Huang
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Slack

                Issue deployment