diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b7ef3eec7125025643863e24fc052f5c82c27b6b..7447f5a647b19f60a0d1a03a307f1efa89e1ad8e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -119,7 +119,7 @@ clang-tidy: .emper-lockless-cq: variables: - EMPER_IO_LOCKED_CQ: 'false' + EMPER_IO_LOCKLESS_CQ: 'true' .default-library-static: variables: diff --git a/emper/Emper.hpp b/emper/Emper.hpp index fba9961d40169b28eeb62774168f406707106c0d..34b0459e41d0d139069a4141beeb7ddcf427bca6 100644 --- a/emper/Emper.hpp +++ b/emper/Emper.hpp @@ -113,14 +113,22 @@ static const bool IO_STEALING = #endif ; -static const bool IO_LOCKED_CQ = -#ifdef EMPER_IO_LOCKED_CQ +static const bool IO_LOCKLESS_CQ = +#ifdef EMPER_IO_LOCKLESS_CQ true #else false #endif ; +enum class IoLocklessMemoryOrder { + weak, + strong, +}; + +static const enum IoLocklessMemoryOrder IO_LOCKLESS_MEMORY_ORDER = + IoLocklessMemoryOrder::EMPER_IO_LOCKLESS_MEMORY_ORDER; + static const bool IO_SINGLE_URING = #ifdef EMPER_IO_SINGLE_URING true diff --git a/emper/io/IoContext.cpp b/emper/io/IoContext.cpp index a11c88c2a6d1194d7e9ddfa62c4d4e2bf6755d93..eeaa87eff08c599a528066a954c0261c74961b31 100644 --- a/emper/io/IoContext.cpp +++ b/emper/io/IoContext.cpp @@ -219,6 +219,16 @@ template void IoContext::submitAndWait<CallerEnvironment::ANYWHERE>(Future &futu template <CallerEnvironment callerEnvironment> auto IoContext::reapCompletionsLockless(Fiber **continuations, unsigned toReap) -> unsigned { + // Configurable memory order for the atomic operations + constexpr auto LL_READ_MEM_ORDER = + emper::IO_LOCKLESS_MEMORY_ORDER == emper::IoLocklessMemoryOrder::weak + ? std::memory_order_acquire + : std::memory_order_seq_cst; + constexpr auto LL_WRITE_MEM_ORDER = + emper::IO_LOCKLESS_MEMORY_ORDER == emper::IoLocklessMemoryOrder::weak + ? std::memory_order_release + : std::memory_order_seq_cst; + std::array<Completion, CQE_BATCH_COUNT> reapedCompletions; if constexpr (callerEnvironment == CallerEnvironment::EMPER) { @@ -234,11 +244,11 @@ auto IoContext::reapCompletionsLockless(Fiber **continuations, unsigned toReap) // if the unsigned head counter overflows during the read and the CAS. // Load possibly concurrently used userspace written head pointer - unsigned head = ahead->load(std::memory_order_acquire); + unsigned head = ahead->load(LL_READ_MEM_ORDER); unsigned count; do { // Load concurrently used kernel written tail pointer - unsigned tail = atail->load(std::memory_order_acquire); + unsigned tail = atail->load(LL_READ_MEM_ORDER); // NOTE: This number may already be wrong during its calculation unsigned ready = tail - head; @@ -271,9 +281,9 @@ auto IoContext::reapCompletionsLockless(Fiber **continuations, unsigned toReap) reapedCompletion.second = cqe_data; } - // try to consume those cqes we stored in reapedCompletions - } while (!ahead->compare_exchange_weak(head, head + count, std::memory_order_release, - std::memory_order_acquire)); + // TODO: think about the correct memory ordering constraints + } while ( + !ahead->compare_exchange_weak(head, head + count, LL_WRITE_MEM_ORDER, LL_READ_MEM_ORDER)); LOGD("got " << count << " cqes from worker " << worker->getWorkerId() << "'s io_uring"); diff --git a/emper/io/IoContext.hpp b/emper/io/IoContext.hpp index 8aa792ee77d0814ef23fb8b5a09e454ada0502e7..60b1e99a8a55194a08ab236135119ccda891aa93 100644 --- a/emper/io/IoContext.hpp +++ b/emper/io/IoContext.hpp @@ -77,7 +77,7 @@ class IoContext : public Logger<LogSubsystem::IO> { (emper::IO_COMPLETER_BEHAVIOR != emper::IoCompleterBehavior::none) || (emper::IO_STEALING); // Are we synchronizing the CQs lockfree - static constexpr bool locklessCq = needsCqLock && !emper::IO_LOCKED_CQ; + static constexpr bool locklessCq = needsCqLock && emper::IO_LOCKLESS_CQ; // TryLock protecting the completion queue of ring. CACHE_LINE_EXCLUSIVE(std::mutex, cq_lock); diff --git a/meson.build b/meson.build index c8f9899436c8eabef3bf621ecd75774280497961..aeaf05dc19077c2eaa5c0b53b41ba4213a091b14 100644 --- a/meson.build +++ b/meson.build @@ -90,7 +90,7 @@ endif io_bool_options = [ 'stealing', - 'locked_cq', + 'lockless_cq', 'single_uring', 'try_syscall', 'uring_sqpoll', @@ -99,6 +99,7 @@ io_bool_options = [ io_raw_options = [ 'worker_uring_entries', + 'lockless_memory_order', ] foreach option : io_bool_options diff --git a/meson_options.txt b/meson_options.txt index 1c6ced87499ffe808e9c7020dd857412658c8421..2a6e56270107e580b83c8b1cb551ee69f23a12e6 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -168,8 +168,15 @@ option( value: false, ) option( - 'io_locked_cq', + 'io_lockless_cq', type: 'boolean', - description: 'Synchronize the CQs with a try lock', - value: true, + description: 'Synchronize the concurrent access to CQs with a lockless algorithm', + value: false, +) +option( + 'io_lockless_memory_order', + type: 'combo', + choices: ['weak', 'strong'], + description: 'Memory ordering used for the lockless CQ algorithm', + value: 'weak', )