Uploaded image for project: 'Mesos'
  1. Mesos
  2. MESOS-7065

Deadlock is possible when a pointer to an actor is captured in this actor's callback.

    XMLWordPrintableJSON

    Details

      Description

      A deadlock is triggered if a callback of the actor A a contains a shared_ptr to itself. In this case, if the last copy of shared_ptr<A> is held by the callback, clearAllCallbacks() called on a's context will try to terminate a and will never succeed.

      I've put up a test demonstrating a deadlock triggered by removing callbacks from a satisfied future:

      #include <iostream>
      #include <process/clock.hpp>
      #include <process/future.hpp>
      #include <process/id.hpp>
      #include <process/process.hpp>
      #include <process/shared.hpp>
      #include <stout/os.hpp>
      
      class SatisfierProcess : public process::Process<SatisfierProcess>
      {
      public:
        SatisfierProcess() : ProcessBase(process::ID::generate("__satisfier__")) {}
        virtual void finalize() { promise.discard(); }
       process::Future<Nothing> future() { return promise.future(); }
      
        void satisfy()
        {
          std::cout << " >> SatisfierProcess: satisfying the promise" << std::endl;
          promise.set(Nothing());
        }
      
        process::Promise<Nothing> promise;
      };
      
      class Satisfier
      {
      public:
        Satisfier()
        {
          process = new SatisfierProcess();
          process::spawn(process);
        }
      
        ~Satisfier()
        {
          std::cout << " > ~Satisfier()" << std::endl;
          process::terminate(process);
          process::wait(process);
          delete process;
        }
      
        process::Future<Nothing> future() const { return dispatch(process, &SatisfierProcess::future); }
        void satisfy() const { dispatch(process->self(), &SatisfierProcess::satisfy); }
      
        SatisfierProcess* process;
      };
      
      
      TEST(CircularDependencyTest, FutureCallbackRemovalTriggersDeadlock)
      {
        {
          // This shared pointer will go out of scope and will
          // only be referenced from the `.onAny` callback.
          process::Shared<Satisfier> s(new Satisfier);
      
          s->future()
            // Callback with a circular dependency. When it finishes,
            // the only reference to `s` is in the callback itself.
            .onAny(process::defer([s](const Future<Nothing>&) {
              std::cout << " | First callback finished" << std::endl;
              return;
            }))
            // Callback that takes some time to process. Note the absence
            // of `defer` here: this callback is executed in the same context
            // where the promise was set, i.e. `s->process->self()` and ensures
            // the first callback has already finished when `clearAllCallbacks`
            // is called. `clearAllCallbacks` removes the last reference to the
            // `Satisfier` instance and hence calls its d-tor, i.e. `Satisfier`'s
            // d-tor is called from the `Satisfier` context => deadlock.
            .onAny([](const Future<Nothing>&) {
              std::cout << " | Second callback started" << std::endl;
              os::sleep(Seconds(2));
              std::cout << " | Second callback finished" << std::endl;
              return;
            });
      
          s->satisfy();
        }
      
        // Wait for all actors to process messages in their queues. Hint:
        // this will not happen because one actor is waits on himself.
        process::Clock::pause();
        process::Clock::settle();
      }
      

      The output of the test looks like:

      [ RUN      ] CircularDependencyTest.FutureCallbackRemovalTriggersDeadlock
       >> SatisfierProcess: satisfying the promise
       | Second callback started
       | First callback finished
       | Second callback finished
       > ~Satisfier()
      
      **** DEADLOCK DETECTED! ****
      You are waiting on process __satisfier__(1)@192.168.9.40:61447 that it is currently executing.
      *** Aborted at 1485916555 (unix time) try "date -d @1485916555" if you are using GNU date ***
      PC: @        0x10c2479a8 process::Future<>::Future()
      *** SIGSEGV (@0xdeb5) received by PID 39047 (TID 0x70000028d000) stack trace: ***
          @     0x7fff85acc52a _sigtramp
          @     0x7fff6754a5c8 (unknown)
          @        0x10c24795d process::Future<>::Future()
          @        0x10c246990 process::Promise<>::future()
          @        0x10f429a0d process::wait()
          @        0x106f062cf process::wait()
          @        0x1075a4b65 mesos::internal::tests::Satisfier::~Satisfier()
          @        0x1075a1c15 mesos::internal::tests::Satisfier::~Satisfier()
          @        0x1075a1b1d process::Shared<>::Data::~Data()
          @        0x1075a19f5 process::Shared<>::Data::~Data()
          @        0x1075a18f1 std::__1::__shared_ptr_pointer<>::__on_zero_shared()
          @     0x7fff8515fcb8 std::__1::__shared_weak_count::__release_shared()
          @        0x106f071bc std::__1::shared_ptr<>::~shared_ptr()
          @        0x106f07185 std::__1::shared_ptr<>::~shared_ptr()
          @        0x1075971c5 process::Shared<>::~Shared()
          @        0x107590ce5 process::Shared<>::~Shared()
          @        0x107586c25 mesos::internal::tests::CircularDependencyTest_FutureCallbackRemovalTriggersDeadlock_Test::TestBody()::$_0::~TestBody()
          @        0x107586bb5 mesos::internal::tests::CircularDependencyTest_FutureCallbackRemovalTriggersDeadlock_Test::TestBody()::$_0::~TestBody()
          @        0x10758b80c _ZZNK7process9_DeferredIZN5mesos8internal5tests65CircularDependencyTest_FutureCallbackRemovalTriggersDeadlock_Test8TestBodyEvE3$_0EcvNSt3__18functionIFvT_EEEIRKNS_6FutureI7NothingEEEEvENUlSH_E_D2Ev
          @        0x107587175 _ZZNK7process9_DeferredIZN5mesos8internal5tests65CircularDependencyTest_FutureCallbackRemovalTriggersDeadlock_Test8TestBodyEvE3$_0EcvNSt3__18functionIFvT_EEEIRKNS_6FutureI7NothingEEEEvENUlSH_E_D1Ev
          @        0x107589c95 _ZNSt3__128__libcpp_compressed_pair_impIZNK7process9_DeferredIZN5mesos8internal5tests65CircularDependencyTest_FutureCallbackRemovalTriggersDeadlock_Test8TestBodyEvE3$_0EcvNS_8functionIFvT_EEEIRKNS1_6FutureI7NothingEEEEvEUlSI_E_NS_9allocatorISJ_EELj2EED2Ev
          @        0x107589c75 _ZNSt3__117__compressed_pairIZNK7process9_DeferredIZN5mesos8internal5tests65CircularDependencyTest_FutureCallbackRemovalTriggersDeadlock_Test8TestBodyEvE3$_0EcvNS_8functionIFvT_EEEIRKNS1_6FutureI7NothingEEEEvEUlSI_E_NS_9allocatorISJ_EEED2Ev
          @        0x107589c55 _ZNSt3__117__compressed_pairIZNK7process9_DeferredIZN5mesos8internal5tests65CircularDependencyTest_FutureCallbackRemovalTriggersDeadlock_Test8TestBodyEvE3$_0EcvNS_8functionIFvT_EEEIRKNS1_6FutureI7NothingEEEEvEUlSI_E_NS_9allocatorISJ_EEED1Ev
          @        0x107589a44 _ZNSt3__110__function6__funcIZNK7process9_DeferredIZN5mesos8internal5tests65CircularDependencyTest_FutureCallbackRemovalTriggersDeadlock_Test8TestBodyEvE3$_0EcvNS_8functionIFvT_EEEIRKNS2_6FutureI7NothingEEEEvEUlSJ_E_NS_9allocatorISK_EEFvSJ_EE18destroy_deallocateEv
          @        0x106f075fa std::__1::function<>::~function()
          @        0x106f06395 std::__1::function<>::~function()
          @        0x106f8b16a process::Future<>::Data::clearAllCallbacks()
          @        0x106f8ae80 process::Future<>::_set<>()
          @        0x106f8818d process::Future<>::set()
          @        0x106f8f41a _ZZNK7process6FutureI7NothingE7onReadyINSt3__16__bindIRMS2_FbRKS1_EJRS2_RNS4_12placeholders4__phILi1EEEEEEbEERKS2_OT_NS2_6PreferEENUlS7_E_clES7_
          @        0x106f8f1fd _ZNSt3__128__invoke_void_return_wrapperIvE6__callIJRZNK7process6FutureI7NothingE7onReadyINS_6__bindIRMS6_FbRKS5_EJRS6_RNS_12placeholders4__phILi1EEEEEEbEERKS6_OT_NS6_6PreferEEUlSA_E_SA_EEEvDpOT_
          @        0x106f8ee09 _ZNSt3__110__function6__funcIZNK7process6FutureI7NothingE7onReadyINS_6__bindIRMS5_FbRKS4_EJRS5_RNS_12placeholders4__phILi1EEEEEEbEERKS5_OT_NS5_6PreferEEUlS9_E_NS_9allocatorISO_EEFvS9_EEclES9_
      make[3]: *** [check-local] Segmentation fault: 11
      make[2]: *** [check-am] Error 2
      make[1]: *** [check] Error 2
      make: *** [check-recursive] Error 1
      

      Note that I've introduced a segfault in "process.cpp" to get the stack trace:

      bool wait(const UPID& pid, const Duration& duration)
      {
        process::initialize();
      
        if (!pid) {
          return false;
        }
      
        // This could result in a deadlock if some code decides to wait on a
        // process that has invoked that code!
        if (__process__ != nullptr && __process__->self() == pid) {
          std::cerr << "\n**** DEADLOCK DETECTED! ****\nYou are waiting on process "
                    << pid << " that it is currently executing." << std::endl;
      
          ((process::Promise<Nothing>*)0xDEAD)->future();
        }
      
        ...
      }
      

      It would be great if we can help users prevent from getting into this deadlock by either rejecting or circumventing this pattern altogether.

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                Unassigned
                Reporter:
                alexr Alex R
              • Votes:
                0 Vote for this issue
                Watchers:
                5 Start watching this issue

                Dates

                • Created:
                  Updated: