From 67b0c77ac22d9c18048c629ec0ef80ad02c61e39 Mon Sep 17 00:00:00 2001
From: Florian Fischer <florian.fischer@muhq.space>
Date: Sat, 9 Oct 2021 16:34:44 +0200
Subject: [PATCH] [meson] introduce lockless memory order and rename lockless
 option

The lockless algorithm can now be configured by setting -Dio_lockless_cq=true
and the used memory ordering by setting -Dio_lockless_memory_order={weak,strong}.

io_lockless_memory_order=weak:
    read with acquire
    write with release

io_lockless_memory_order=strong:
    read with seq_cst
    write with seq_cst
---
 .gitlab-ci.yml         |  2 +-
 emper/Emper.hpp        | 12 ++++++++++--
 emper/io/IoContext.cpp | 20 +++++++++++++++-----
 emper/io/IoContext.hpp |  2 +-
 meson.build            |  3 ++-
 meson_options.txt      | 13 ++++++++++---
 6 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b7ef3eec..7447f5a6 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -119,7 +119,7 @@ clang-tidy:
 
 .emper-lockless-cq:
   variables:
-    EMPER_IO_LOCKED_CQ: 'false'
+    EMPER_IO_LOCKLESS_CQ: 'true'
 
 .default-library-static:
   variables:
diff --git a/emper/Emper.hpp b/emper/Emper.hpp
index fba9961d..34b0459e 100644
--- a/emper/Emper.hpp
+++ b/emper/Emper.hpp
@@ -113,14 +113,22 @@ static const bool IO_STEALING =
 #endif
 		;
 
-static const bool IO_LOCKED_CQ =
-#ifdef EMPER_IO_LOCKED_CQ
+static const bool IO_LOCKLESS_CQ =
+#ifdef EMPER_IO_LOCKLESS_CQ
 		true
 #else
 		false
 #endif
 		;
 
+enum class IoLocklessMemoryOrder {
+	weak,
+	strong,
+};
+
+static const enum IoLocklessMemoryOrder IO_LOCKLESS_MEMORY_ORDER =
+		IoLocklessMemoryOrder::EMPER_IO_LOCKLESS_MEMORY_ORDER;
+
 static const bool IO_SINGLE_URING =
 #ifdef EMPER_IO_SINGLE_URING
 		true
diff --git a/emper/io/IoContext.cpp b/emper/io/IoContext.cpp
index a11c88c2..eeaa87ef 100644
--- a/emper/io/IoContext.cpp
+++ b/emper/io/IoContext.cpp
@@ -219,6 +219,16 @@ template void IoContext::submitAndWait<CallerEnvironment::ANYWHERE>(Future &futu
 
 template <CallerEnvironment callerEnvironment>
 auto IoContext::reapCompletionsLockless(Fiber **continuations, unsigned toReap) -> unsigned {
+	// Configurable memory order for the atomic operations
+	constexpr auto LL_READ_MEM_ORDER =
+			emper::IO_LOCKLESS_MEMORY_ORDER == emper::IoLocklessMemoryOrder::weak
+					? std::memory_order_acquire
+					: std::memory_order_seq_cst;
+	constexpr auto LL_WRITE_MEM_ORDER =
+			emper::IO_LOCKLESS_MEMORY_ORDER == emper::IoLocklessMemoryOrder::weak
+					? std::memory_order_release
+					: std::memory_order_seq_cst;
+
 	std::array<Completion, CQE_BATCH_COUNT> reapedCompletions;
 
 	if constexpr (callerEnvironment == CallerEnvironment::EMPER) {
@@ -234,11 +244,11 @@ auto IoContext::reapCompletionsLockless(Fiber **continuations, unsigned toReap)
 	//       if the unsigned head counter overflows during the read and the CAS.
 
 	// Load possibly concurrently used userspace written head pointer
-	unsigned head = ahead->load(std::memory_order_acquire);
+	unsigned head = ahead->load(LL_READ_MEM_ORDER);
 	unsigned count;
 	do {
 		// Load concurrently used kernel written tail pointer
-		unsigned tail = atail->load(std::memory_order_acquire);
+		unsigned tail = atail->load(LL_READ_MEM_ORDER);
 
 		// NOTE: This number may already be wrong during its calculation
 		unsigned ready = tail - head;
@@ -271,9 +281,9 @@ auto IoContext::reapCompletionsLockless(Fiber **continuations, unsigned toReap)
 			reapedCompletion.second = cqe_data;
 		}
 
-		// try to consume those cqes we stored in reapedCompletions
-	} while (!ahead->compare_exchange_weak(head, head + count, std::memory_order_release,
-																				 std::memory_order_acquire));
+		// TODO: think about the correct memory ordering constraints
+	} while (
+			!ahead->compare_exchange_weak(head, head + count, LL_WRITE_MEM_ORDER, LL_READ_MEM_ORDER));
 
 	LOGD("got " << count << " cqes from worker " << worker->getWorkerId() << "'s io_uring");
 
diff --git a/emper/io/IoContext.hpp b/emper/io/IoContext.hpp
index 8aa792ee..60b1e99a 100644
--- a/emper/io/IoContext.hpp
+++ b/emper/io/IoContext.hpp
@@ -77,7 +77,7 @@ class IoContext : public Logger<LogSubsystem::IO> {
 			(emper::IO_COMPLETER_BEHAVIOR != emper::IoCompleterBehavior::none) || (emper::IO_STEALING);
 
 	// Are we synchronizing the CQs lockfree
-	static constexpr bool locklessCq = needsCqLock && !emper::IO_LOCKED_CQ;
+	static constexpr bool locklessCq = needsCqLock && emper::IO_LOCKLESS_CQ;
 
 	// TryLock protecting the completion queue of ring.
 	CACHE_LINE_EXCLUSIVE(std::mutex, cq_lock);
diff --git a/meson.build b/meson.build
index c8f98994..aeaf05dc 100644
--- a/meson.build
+++ b/meson.build
@@ -90,7 +90,7 @@ endif
 
 io_bool_options = [
 	'stealing',
-	'locked_cq',
+	'lockless_cq',
 	'single_uring',
 	'try_syscall',
 	'uring_sqpoll',
@@ -99,6 +99,7 @@ io_bool_options = [
 
 io_raw_options = [
 	'worker_uring_entries',
+	'lockless_memory_order',
 ]
 
 foreach option : io_bool_options
diff --git a/meson_options.txt b/meson_options.txt
index 1c6ced87..2a6e5627 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -168,8 +168,15 @@ option(
   value: false,
 )
 option(
-  'io_locked_cq',
+  'io_lockless_cq',
   type: 'boolean',
-  description: 'Synchronize the CQs with a try lock',
-  value: true,
+  description: 'Synchronize the concurrent access to CQs with a lockless algorithm',
+  value: false,
+)
+option(
+  'io_lockless_memory_order',
+  type: 'combo',
+  choices: ['weak', 'strong'],
+  description: 'Memory ordering used for the lockless CQ algorithm',
+  value: 'weak',
 )
-- 
GitLab