Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-6389

Crash when querying table with "\0" as a row delimiter

    Details

    • Type: Bug
    • Status: Closed
    • Priority: Blocker
    • Resolution: Fixed
    • Affects Version/s: Impala 2.8.0, Impala 2.9.0, Impala 2.10.0, Impala 2.11.0
    • Fix Version/s: Impala 2.13.0
    • Component/s: Backend
    • Labels:
    • Docs Text:
      Workaround: Use an alternative delimiter (e.g. \001).
    • Target Version:
    • Epic Color:
      ghx-label-2

      Description

      A user reported this bug here: http://community.cloudera.com/t5/Interactive-Short-cycle-SQL/Impala-quot-Cancelled-due-to-unreachable-impalad-s-quot-when/m-p/63577#M4044?eid=1&aid=1

      The following sequence causes an impalad crash:

      create table tab_separated(id bigint, s string, n int, t timestamp, b boolean)
        row format delimited
        fields terminated by '\t' escaped by '\\' lines terminated by '\000'
        stored as textfile;
      select * from tab_separated; -- Done. 0 results.
      insert into tab_separated (id, s) values (100, ''); -- Success.
      select * from tab_separated; -- 20 second delay before getting "Cancelled due to unreachable impalad(s): xxxx:22000"
      
      (gdb) bt
      #0  0x00007f90414371f7 in raise () from /lib64/libc.so.6
      #1  0x00007f90414388e8 in abort () from /lib64/libc.so.6
      #2  0x00007f9044105185 in os::abort(bool) () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
      #3  0x00007f90442a7593 in VMError::report_and_die() () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
      #4  0x00007f904410a68f in JVM_handle_linux_signal () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
      #5  0x00007f9044100be3 in signalHandler(int, siginfo*, void*) () from /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
      #6  <signal handler called>
      #7  0x00007f904154e574 in __memcpy_ssse3_back () from /lib64/libc.so.6
      #8  0x0000000000dc01a0 in impala::StringBuffer::Append (this=this@entry=0x723c5c0, str=0x722a00f "\025\b\034\030\022William S. Pollard\030\016Adolfo A. Lieb\026", str_len=-1)
          at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/runtime/string-buffer.h:54
      #9  0x0000000000dbdc05 in impala::HdfsTextScanner::ProcessRange (this=this@entry=0x723c400, row_batch=row_batch@entry=0x74157a0, num_tuples=num_tuples@entry=0x7f8fe4f3821c)
          at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-text-scanner.cc:397
      #10 0x0000000000dbfdeb in impala::HdfsTextScanner::GetNextInternal (this=0x723c400, row_batch=0x74157a0)
          at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-text-scanner.cc:446
      #11 0x0000000000d9d18d in impala::HdfsScanner::ProcessSplit (this=0x723c400) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-scanner.cc:121
      #12 0x0000000000d8e1d9 in impala::HdfsScanNode::ProcessSplit (this=0x5496300, filter_ctxs=..., expr_results_pool=0x8000, expr_results_pool@entry=0x7f8fe4f38780, 
          scan_range=0x64c8000) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-scan-node.cc:532
      #13 0x0000000000d8fd65 in impala::HdfsScanNode::ScannerThread (this=0x5496300) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-scan-node.cc:442
      #14 0x0000000000d90262 in operator() (__closure=0x7f8fe4f38c68) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/exec/hdfs-scan-node.cc:354
      #15 boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourceMgr::ResourcePool*)::<lambda()>, void>::invoke(boost::detail::function::function_buffer &) (function_obj_ptr=...) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:153
      #16 0x0000000000d19db3 in operator() (this=0x7f8fe4f38c60)
          at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/function/function_template.hpp:767
      #17 impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*) (name=..., category=..., functor=..., 
          thread_started=<optimized out>) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/be/src/util/thread.cc:356
      #18 0x0000000000d1a544 in operator()<void (*)(const std::basic_string<char>&, const std::basic_string<char>&, boost::function<void()>, impala::Promise<long int>*), boost::_bi::list0> (f=@0x7f705b8: 0xd19b20 <impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*)>, a=<synthetic pointer>, 
          this=0x7f705c0) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/bind/bind.hpp:457
      #19 operator() (this=0x7f705b8) at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/bind/bind_template.hpp:20
      #20 boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*), boost::_bi::list4<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::Promise<long>*> > > >::run() (this=0x7f70400)
          at /usr/src/debug/impala-2.11.0-cdh5.15.0-SNAPSHOT/toolchain/boost-1.57.0-p3/include/boost/thread/detail/thread.hpp:116
      #21 0x00000000012ad78a in thread_proxy ()
      #22 0x00007f90417cce25 in start_thread () from /lib64/libpthread.so.0
      #23 0x00007f90414fa34d in clone () from /lib64/libc.so.6
      

      It looks like col_start and byte_buffer_ptr_ are somehow inconsistent and eventually a negative value gets passed into memcpy() and converted to a huge unsigned number.

      (gdb) p col_start
      $1 = 0x722a00f "\025\b\034\030\022William S. Pollard\030\016Adolfo A. Lieb\026"
      (gdb) p byte_buffer_ptr_
      $2 = 0x722a00e "\006\025\b\034\030\022William S. Pollard\030\016Adolfo A. Lieb\026"
      

      The above strings are some test data from the environment we were running on, rather than actual table data.

      Apparently Hive doesn't support tables like this itself, at least for now: HIVE-5999.

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                zamsden Zach Amsden
                Reporter:
                tarmstrong Tim Armstrong
              • Votes:
                0 Vote for this issue
                Watchers:
                3 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: