From 1e647338589b04a921f25cd27a74a6cc87e16b81 Mon Sep 17 00:00:00 2001 From: Florian Fischer <florian.fischer@muhq.space> Date: Fri, 24 Sep 2021 17:06:54 +0200 Subject: [PATCH] [IoContext] document and fix possible scenario resulting in lost wakeup This is fixed by using a normal lock instead of the try lock in the OWNER case. --- emper/io/IoContext.cpp | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/emper/io/IoContext.cpp b/emper/io/IoContext.cpp index a3319071..5dbeb52d 100644 --- a/emper/io/IoContext.cpp +++ b/emper/io/IoContext.cpp @@ -322,14 +322,36 @@ reap_cqes: // Number of actual continuation fibers resulting from the reaped CQEs unsigned continuationsCount = 0; + // TODO: Is using a try lock and the waitInflight flag here even sound? + // Coudn't it be possible to have a lost wakeup with unconsumed new work notification + // cqe in our CQ + // + // State only a single worker does work involving IO and another (completer, io-stealing + // worker accesses its CQ. + + // Other Owner + // | submit IO + // | lock + // | prepare to sleep + // | set flag + // | unlock + // | sleep + // lock | + // | try lock unsucessfull + // | sleep again + // check flag | + // unlock | if constexpr (needsCqLock) { - // Someone else is currently reaping completions - if (unlikely(!cq_lock.try_lock())) { - LOGD("unsuccessful try_lock from " << callerEnvironment); - return 0; - } - - if constexpr (callerEnvironment != CallerEnvironment::OWNER) { + // The Owner always takes the lock to reap all completions and especially + // new work notifications and prevent the above discribed problem. + if constexpr (callerEnvironment == CallerEnvironment::OWNER) { + cq_lock.lock(); + } else { + // Someone else is currently reaping completions + if (unlikely(!cq_lock.try_lock())) { + LOGD("unsuccessful try_lock from " << callerEnvironment); + return 0; + } // We have to check the waitInflight flag with the cq_lock held to // ensure we observe an update by the worker holding the lock. // Otherwise this could happen: -- GitLab