Details
Description
In an Impala cluster, we ran into a scenario that causes Impala to crash with a SIGSEGV. When reproducing while running in gdb, we see the stack get blown out with this recursion:
#0 0x00007f983e031a1c in clock_gettime () #1 0x00007f983bfda0b5 in __GI___clock_gettime (clock_id=clock_id@entry=1, tp=0x7f967bd8b070) at ../sysdeps/unix/sysv/linux/clock_gettime.c:38 #2 0x00007f983c9f8e48 in kudu::Stopwatch::GetTimes (times=0x7f967bd8b1b0, this=<optimized out>, this=<optimized out>) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:294 #3 0x00007f983ca09829 in kudu::Stopwatch::stop (this=0x7f967bd8b320) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:218 #4 kudu::Stopwatch::stop (this=0x7f967bd8b320) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:213 #5 kudu::sw_internal::LogTiming::Print (max_expected_millis=50, this=0x7f967bd8b320) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:359 #6 kudu::sw_internal::LogTiming::~LogTiming (this=0x7f967bd8b320, __in_chrg=<optimized out>) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/stopwatch.h:329 #7 0x00007f983c9fe32c in kudu::client::internal::MetaCache::LookupEntryByKeyFastPath (this=<optimized out>, table=<optimized out>, partition_key=..., entry=0x7f967bd8b4c0) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/locks.h:99 #8 0x00007f983c9fe656 in kudu::client::internal::MetaCache::DoFastPathLookup (this=0xde431e0, table=0xf899300, partition_key=0x7f967bd8b700, lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1243 #9 0x00007f983ca05731 in kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1405 #10 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 #11 0x00007f983ca0575f in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0x7f967bd8b8c0) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #12 kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408 #13 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 #14 0x00007f983ca0575f in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0x7f967bd8bad0) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #15 kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408 #16 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 #17 0x00007f983ca0575f in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0x7f967bd8bce0) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #18 kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408 #19 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 #20 0x00007f983ca0575f in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0x7f967bd8bef0) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #21 kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408 #22 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 #23 0x00007f983ca0575f in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0x7f967bd8c100) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #24 kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408 #25 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 #26 0x00007f983ca0575f in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0x7f967bd8c310) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #27 kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408 ... continues ... #47617 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 #47618 0x00007f983ca0575f in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0x7f967c589290) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #47619 kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408 #47620 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 --Type <RET> for more, q to quit, c to continue without paging-- #47621 0x00007f983ca0575f in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0x7f967c5894a0) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #47622 kudu::client::internal::MetaCache::LookupTabletByKey(kudu::client::KuduTable const*, kudu::PartitionKey, kudu::MonoTime const&, kudu::client::internal::MetaCache::LookupType, scoped_refptr<kudu::client::internal::RemoteTablet>*, std::function<void (kudu::Status const&)> const&) (this=0xde431e0, table=0xf899300, partition_key=..., deadline=..., lookup_type=kudu::client::internal::MetaCache::LookupType::kPoint, remote_tablet=0x0, callback=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:1408 #47623 0x00007f983ca0598c in kudu::client::internal::MetaCacheServerPicker::PickLeader(std::function<void (kudu::Status const&, kudu::client::internal::RemoteTabletServer*)> const&, kudu::MonoTime const&) (this=0xdec0000, callback=..., deadline=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/common/partition.h:153 #47624 0x00007f983ca066a7 in std::function<void (kudu::Status const&)>::operator()(kudu::Status const&) const (__args#0=..., this=0xca50918) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617#47625 kudu::client::internal::LookupRpc::SendRpcCb (this=0xca50800, status=...) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/client/meta_cache.cc:966 #47626 0x00007f983c9db65c in kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB, kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}::operator()() const (this=<optimized out>, this=<optimized out>) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/status.h:230#47627 std::__invoke_impl<void, kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB, kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&>(std::__invoke_other, kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB, kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&) (__f=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/invoke.h:60 #47628 std::__invoke_r<void, kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB, kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&>(void&&, (kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB, kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}&)...) (__fn=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/invoke.h:110 #47629 std::_Function_handler<void (), kudu::client::internal::AsyncLeaderMasterRpc<kudu::master::GetTableLocationsRequestPB, kudu::master::GetTableLocationsResponsePB>::SendRpc()::{lambda()#1}>::_M_invoke(std::_Any_data const&) (__functor=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:291 #47630 0x00007f983cac860b in std::function<void ()>::operator()() const (this=0xee3f9c0) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #47631 kudu::rpc::OutboundCall::CallCallback (this=0xee3f840) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/outbound_call.cc:309 #47632 0x00007f983cabb763 in kudu::rpc::Connection::HandleCallResponse (this=0xcd00700, transfer=...) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/unique_ptr.h:172 #47633 0x00007f983cabc215 in kudu::rpc::Connection::ReadHandler (this=0xcd00700, watcher=..., revents=<optimized out>) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/unique_ptr.h:172#47634 0x00007f983cdb3ffb in ev_invoke_pending (loop=0xcc99b00) at /mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3155 #47635 0x00007f983ca97cc8 in kudu::rpc::ReactorThread::InvokePendingCb (loop=0xcc99b00) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/reactor.cc:202 #47636 0x00007f983cdb73f7 in ev_run (flags=0, loop=0xcc99b00) at /mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3555 #47637 ev_run (loop=0xcc99b00, flags=0) at /mnt/source/kudu/kudu-345fd44ca3/thirdparty/src/libev-4.20/ev.c:3402 #47638 0x00007f983ca98bd9 in ev::loop_ref::run (flags=0, this=0xef75be0) at /mnt/source/kudu/kudu-345fd44ca3/thirdparty/installed/uninstrumented/include/ev++.h:211#47639 kudu::rpc::ReactorThread::RunThread (this=0xef75bd8) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/rpc/reactor.cc:503 #47640 0x00007f983cc2d36c in std::function<void ()>::operator()() const (this=0xec68358) at /mnt/build/gcc-10.4.0/include/c++/10.4.0/bits/std_function.h:617 #47641 kudu::Thread::SuperviseThread (arg=0xec68300) at /mnt/source/kudu/kudu-345fd44ca3/src/kudu/util/thread.cc:691 #47642 0x00007f983dfec609 in start_thread (arg=<optimized out>) at pthread_create.c:477 #47643 0x00007f983c01c133 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
It hits a SIGSEGV because the stack gets blown out.
Here are the steps to reproduce it from Impala:
/** 1. Create table **/ drop table if exists impala_crash; create table if not exists impala_crash ( dt string, col string, primary key(dt) ) partition by range(dt) ( partition values <= '00000000' ) stored as kudu;/** 2. alter and insert **/ alter table impala_crash drop if exists range partition value='20230301'; alter table impala_crash add if not exists range partition value='20230301'; insert into impala_crash values ('20230301','abc'); /* normal *//** 3. Run the same queries again and impala daemon crashes **/ alter table impala_crash drop if exists range partition value='20230301'; alter table impala_crash add if not exists range partition value='20230301'; insert into impala_crash values ('20230301','abc');