Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-4494

Crash in SimpleScheduler when restarting under load

    XMLWordPrintableJSON

Details

    Description

      During startup the scheduler can start scheduling queries before the local node has been registered as a backend through the statestore. If a query runs with exec_at_coord, then it will fail to lookup the local backend in the BackendConfig and the scheduler will eventually crash.

      #0  0x00007f387dd5e5e5 in ?? () from sysroot/lib64/libc.so.6
      #1  0x00007f387dd5fdc5 in abort () from sysroot/lib64/libc.so.6
      #2  0x00007f387fcd5a55 in os::abort(bool) () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #3  0x00007f387fe55f87 in VMError::report_and_die() () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #4  0x00007f387fe5650e in crash_handler(int, siginfo*, void*) () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #5  0x00007f387fcd4bf2 in os::Linux::chained_handler(int, siginfo*, void*) () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #6  0x00007f387fcda8d6 in JVM_handle_linux_signal () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #7  <signal handler called>
      #8  0x00007f387fccc511 in os::is_first_C_frame(frame*) () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #9  0x00007f387fe5467d in VMError::report(outputStream*) () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #10 0x00007f387fe55b8a in VMError::report_and_die() () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #11 0x00007f387fcda96f in JVM_handle_linux_signal () from sysroot/usr/java/jdk1.7.0_67/jre/lib/amd64/server/libjvm.so
      #12 <signal handler called>
      #13 0x0000000000a93881 in impala::SimpleScheduler::AssignmentCtx::GetBackendRank(std::string const&) const ()
      #14 0x0000000000a989e3 in impala::SimpleScheduler::AssignmentCtx::RecordScanRangeAssignment(impala::TBackendDescriptor const&, int, std::vector<impala::TNetworkAddress, std::allocator<impala::TNetworkAddress> > const&, impala::TScanRangeLocations const&, boost::unordered::unordered_map<impala::TNetworkAddress, std::map<int, std::vector<impala::TScanRangeParams, std::allocator<impala::TScanRangeParams> >, std::less<int>, std::allocator<std::pair<int const, std::vector<impala::TScanRangeParams, std::allocator<impala::TScanRangeParams> > > > >, boost::hash<impala::TNetworkAddress>, std::equal_to<impala::TNetworkAddress>, std::allocator<std::pair<impala::TNetworkAddress const, std::map<int, std::vector<impala::TScanRangeParams, std::allocator<impala::TScanRangeParams> >, std::less<int>, std::allocator<std::pair<int const, std::vector<impala::TScanRangeParams, std::allocator<impala::TScanRangeParams> > > > > > > >*) ()
      #15 0x0000000000a9979a in impala::SimpleScheduler::ComputeScanRangeAssignment(impala::BackendConfig const&, int, impala::TReplicaPreference::type const*, bool, std::vector<impala::TScanRangeLocations, std::allocator<impala::TScanRangeLocations> > const&, std::vector<impala::TNetworkAddress, std::allocator<impala::TNetworkAddress> > const&, bool, impala::TQueryOptions const&, impala::RuntimeProfile::Counter*, boost::unordered::unordered_map<impala::TNetworkAddress, std::map<int, std::vector<impala::TScanRangeParams, std::allocator<impala::TScanRangeParams> >, std::less<int>, std::allocator<std::pair<int const, std::vector<impala::TScanRangeParams, std::allocator<impala::TScanRangeParams> > > > >, boost::hash<impala::TNetworkAddress>, std::equal_to<impala::TNetworkAddress>, std::allocator<std::pair<impala::TNetworkAddress const, std::map<int, std::vector<impala::TScanRangeParams, std::allocator<impala::TScanRangeParams> >, std::less<int>, std::allocator<std::pair<int const, std::vector<impala::TScanRangeParams, std::allocator<impala::TScanRangeParams> > > > > > > >*) ()
      #16 0x0000000000a99b44 in impala::SimpleScheduler::ComputeScanRangeAssignment(impala::TQueryExecRequest const&, impala::QuerySchedule*) ()
      #17 0x0000000000a99ddf in impala::SimpleScheduler::Schedule(impala::Coordinator*, impala::QuerySchedule*) ()
      #18 0x0000000000b26178 in impala::ImpalaServer::QueryExecState::ExecQueryOrDmlRequest(impala::TQueryExecRequest const&) ()
      #19 0x0000000000b28d84 in impala::ImpalaServer::QueryExecState::Exec(impala::TExecRequest*) ()
      #20 0x0000000000ad4336 in impala::ImpalaServer::ExecuteInternal(impala::TQueryCtx const&, std::shared_ptr<impala::ImpalaServer::SessionState>, bool*, std::shared_ptr<impala::ImpalaServer::QueryExecState>*) ()
      #21 0x0000000000ad9ba8 in impala::ImpalaServer::Execute(impala::TQueryCtx*, std::shared_ptr<impala::ImpalaServer::SessionState>, std::shared_ptr<impala::ImpalaServer::QueryExecState>*) ()
      #22 0x0000000000b0834b in impala::ImpalaServer::ExecuteStatement(apache::hive::service::cli::thrift::TExecuteStatementResp&, apache::hive::service::cli::thrift::TExecuteStatementReq const&) ()
      #23 0x0000000000b453e0 in impala::ChildQuery::ExecAndFetch() ()
      #24 0x0000000000b1e973 in impala::ImpalaServer::QueryExecState::ExecChildQueries() ()
      #25 0x0000000000bf5ad9 in impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*) ()
      #26 0x0000000000bf6474 in boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::Promise<long>*), boost::_bi::list4<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::Promise<long>*> > > >::run() ()
      #27 0x0000000000e5c3aa in ?? ()
      #28 0x00007f387e0c7aa1 in start_thread () from sysroot/lib64/libpthread.so.0
      #29 0x00007f387de14aad in ?? () from sysroot/lib64/libc.so.6
      #30 0x0000000000000000 in ?? ()
      

      Attachments

        Issue Links

          Activity

            People

              lv Lars Volker
              lv Lars Volker
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: