Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-9297

HdfsOrcScanner crash in orc::RleDecoderV2::next

    XMLWordPrintableJSON

    Details

    • Type: Bug
    • Status: Resolved
    • Priority: Blocker
    • Resolution: Fixed
    • Affects Version/s: None
    • Fix Version/s: Impala 3.4.0
    • Component/s: Backend
    • Labels:
      None

      Description

      Hit a crash when running fuzz_scanners test for ORC.

      Operating system: Linux
                        0.0.0 Linux 4.15.0-72-generic #81~16.04.1-Ubuntu SMP Tue Nov 26 16:34:21 UTC 2019 x86_64
      CPU: amd64
           family 6 model 158 stepping 10
           1 CPU
      
      GPU: UNKNOWN
      
      Crash reason:  SIGSEGV
      Crash address: 0x0
      Process uptime: not available
      
      Thread 313 (crashed)
       0  impalad!orc::RleDecoderV2::next(long*, unsigned long, char const*) [RLEv2.hh : 167 + 0xa]
       1  impalad!orc::StringDictionaryColumnReader::StringDictionaryColumnReader(orc::Type const&, orc::StripeStreams&) [ColumnReader.cc : 581 + 0x19]
       2  impalad!orc::buildReader(orc::Type const&, orc::StripeStreams&) [ColumnReader.cc : 1756 + 0x1b]
       3  impalad!orc::StructColumnReader::StructColumnReader(orc::Type const&, orc::StripeStreams&) [ColumnReader.cc : 876 + 0x10]
       4  impalad!orc::buildReader(orc::Type const&, orc::StripeStreams&) [ColumnReader.cc : 1787 + 0x1b]
       5  impalad!orc::RowReaderImpl::startNextStripe() [Reader.cc : 917 + 0x12]
       6  impalad!orc::RowReaderImpl::next(orc::ColumnVectorBatch&) [Reader.cc : 932 + 0x5]
       7  impalad!impala::HdfsOrcScanner::AssembleRows(impala::RowBatch*) [hdfs-orc-scanner.cc : 618 + 0x40]
       8  impalad!impala::HdfsOrcScanner::GetNextInternal(impala::RowBatch*) [hdfs-orc-scanner.cc : 516 + 0x20]
       9  impalad!impala::HdfsOrcScanner::ProcessSplit() [hdfs-orc-scanner.cc : 435 + 0x39]
      10  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 514 + 0x28]
      11  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 416 + 0x2a]
      12  impalad!impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::{lambda()#1}::operator()() const + 0x30
      13  impalad!boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::<lambda()>, void>::invoke [function_template.hpp : 153 + 0xc]
      14  impalad!boost::function0<void>::operator()() const [function_template.hpp : 767 + 0x11]
      15  impalad!impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0xf]
      16  impalad!void boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) [bind.hpp : 525 + 0x15]
      17  impalad!boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >::operator()() [bind_template.hpp : 20 + 0x22]
      18  impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run() [thread.hpp : 116 + 0x12]
      19  impalad!thread_proxy + 0xda
      20  libpthread-2.23.so + 0x76ba
      21  libc-2.23.so + 0x10741d
      

      How to reproduce
      Build Impala using the latest ORC lib. Doc: https://cwiki.apache.org/confluence/display/IMPALA/Compile+Impala+with+the+latest+ORC+library

      • Impala git-hash: d66610837e53965cb969b78116aec58164bb8548
      • ORC git-hash: 2b012e5a7beb1b688839566522191a8489adb944

      Create table and load the corrupt ORC file (attached).

      CREATE TABLE alltypes_RleDecoderV2_next_crash (
        id INT COMMENT 'Add a comment',                  
        bool_col BOOLEAN,                                
        tinyint_col TINYINT,                             
        smallint_col SMALLINT,                           
        int_col INT,                                     
        bigint_col BIGINT,                               
        float_col FLOAT,                                 
        double_col DOUBLE,                               
        date_string_col STRING,                          
        string_col STRING,                               
        timestamp_col TIMESTAMP                          
      )                                                   
      STORED AS ORC;
      

      Run the query:

      select count(*) from (select distinct * from alltypes_RleDecoderV2_next_crash) q;
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                boroknagyz Zoltán Borók-Nagy
                Reporter:
                stigahuang Quanlong Huang
              • Votes:
                0 Vote for this issue
                Watchers:
                2 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: