From 46302a0fc1a5c7fc2e6c2718d4573aae4dea943a Mon Sep 17 00:00:00 2001
From: Florian Schmaus <flow@cs.fau.de>
Date: Sun, 27 Feb 2022 17:55:57 +0100
Subject: [PATCH] Add worker sleep stats, rework stats machinery

The idea of the new stats machinery is that 'stats' becomes an option
that enables the basic stats gathering infrastructure in EMPER. At
some point, it should become a non-user option, i.e., it should be
remove from meson_options.txt. Then comes a layer of fine-grained
stats control switches, which default to 'auto'. Third, a new option
called 'stats_all' is added, which, if enabled, activates all
fine-graind stats knobs that are set to 'auto'.
---
 emper/Dispatcher.cpp   | 37 ++++++++++++++++++++++++++++++++++++-
 emper/Dispatcher.hpp   | 11 +++++++++++
 emper/Emper.hpp        |  8 ++++++++
 emper/Runtime.cpp      | 12 ++++++++++++
 emper/Runtime.hpp      |  4 ++++
 emper/lib/env.hpp      |  2 +-
 emper/stats/Worker.cpp | 35 +++++++++++++++++++++++++++++++++++
 emper/stats/Worker.hpp | 14 ++++++++++++++
 meson.build            | 30 ++++++++++++++++++++++++++++--
 meson_options.txt      | 22 ++++++++++++++++++++--
 10 files changed, 169 insertions(+), 6 deletions(-)

diff --git a/emper/Dispatcher.cpp b/emper/Dispatcher.cpp
index af53cebe..44b156a0 100644
--- a/emper/Dispatcher.cpp
+++ b/emper/Dispatcher.cpp
@@ -1,11 +1,21 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
-// Copyright © 2020 Florian Schmaus
+// Copyright © 2020-2022 Florian Schmaus
 #include "Dispatcher.hpp"
 
 #include <sched.h>
 
+#include <atomic>
+#include <chrono>
+
 #include "Emper.hpp"
 #include "Runtime.hpp"	// for Runtime
+#include "WakeupStrategy.hpp"
+#include "sleep_strategy/SemaphoreWorkerSleepStrategy.hpp"
+#include "stats/Worker.hpp"
+
+// Note that we initialize this with true to avoid counting workers
+// that initially go to sleep as being woken up in vain.
+thread_local bool Dispatcher::dispatchedWork = true;
 
 auto Dispatcher::getDispatchLoop() -> func_t {
 	return [this] { dispatchLoop(); };
@@ -26,14 +36,39 @@ void Dispatcher::dispatchLoopSleep() {
 	auto& workerSleepStrategy = runtime.workerSleepStrategy;
 	auto& terminateWorkers = runtime.terminateWorkers;
 
+	using clock = std::chrono::system_clock;
+	std::chrono::time_point<clock> sleepStart;
+	bool workerSlept = false;
+
 	bool canWake;
 	do {
 		// Notify the wakeup strategy about our sleep attempt
+		// TODO: We could take stats how often the wakeupStrategy decided
+		// that the worker should not sleep, and how often this was the
+		// right decission (based on dispatchedWork and a new thread_local sleepDenied).
 		if (!wakeupStrategy.canSleep()) break;
 
+		if constexpr (emper::STATS_WORKER_SLEEP) {
+			if (!workerSlept) {
+				if (!dispatchedWork) emper::stats::worker->wakeupInVainCount++;
+				workerSlept = true;
+				sleepStart = clock::now();
+			}
+		}
+
 		workerSleepStrategy.sleep();
 
 		// We always wakeup if the runtime is terminating
 		canWake = wakeupStrategy.canWakeup() || terminateWorkers.load(std::memory_order_relaxed);
 	} while (!canWake);
+
+	if constexpr (emper::STATS_WORKER_SLEEP) {
+		if (workerSlept) {
+			auto sleepStop = clock::now();
+			emper::stats::worker->recordSleep(sleepStart, sleepStop);
+			// TODO: Not sure if we should set dispatchedWork to false regardless of the value of
+			// firstSleep
+			dispatchedWork = false;
+		}
+	}
 }
diff --git a/emper/Dispatcher.hpp b/emper/Dispatcher.hpp
index 363c7732..bccd2eaa 100644
--- a/emper/Dispatcher.hpp
+++ b/emper/Dispatcher.hpp
@@ -6,6 +6,7 @@
 #include "Common.hpp"
 #include "Context.hpp"
 #include "Debug.hpp"
+#include "Emper.hpp"
 #include "Fiber.hpp"
 #include "emper-common.h"
 
@@ -16,6 +17,11 @@ class Dispatcher : public Logger<LogSubsystem::DISP> {
  private:
 	void dispatchLoopSleep();
 
+	/**
+	 * Marker bool if the worker dispatched work, used for "worker sleep" stats.
+	 */
+	static thread_local bool dispatchedWork;
+
  protected:
 	Runtime& runtime;
 
@@ -30,6 +36,11 @@ class Dispatcher : public Logger<LogSubsystem::DISP> {
 	// NOLINTNEXTLINE(readability-convert-member-functions-to-static)
 	inline void dispatch(AbstractFiber* fiber) {
 		LOGD("executing fiber " << fiber);
+
+		if constexpr (emper::STATS_WORKER_SLEEP) {
+			dispatchedWork = true;
+		}
+
 		Context::setCurrentFiber(fiber);
 		fiber->run();
 	}
diff --git a/emper/Emper.hpp b/emper/Emper.hpp
index bc87b2ea..02df6edf 100644
--- a/emper/Emper.hpp
+++ b/emper/Emper.hpp
@@ -317,4 +317,12 @@ const bool STACK_GUARD_PAGE =
 #endif
 		;
 
+constexpr bool STATS_WORKER_SLEEP =
+#ifdef EMPER_STATS_WORKER_SLEEP
+		true
+#else
+		false
+#endif
+		;
+
 }	 // namespace emper
diff --git a/emper/Runtime.cpp b/emper/Runtime.cpp
index a897ff11..144cd013 100644
--- a/emper/Runtime.cpp
+++ b/emper/Runtime.cpp
@@ -17,6 +17,7 @@
 #include <fstream>	// IWYU pragma: keep
 #include <iostream>
 #include <memory>	 // for __shared_ptr_access, shared_ptr
+#include <ratio>
 #include <string>	 // for string
 #include <thread>
 
@@ -498,6 +499,17 @@ void Runtime::printStats(std::ostream& out, bool detailed) {
 	}
 
 	globalWorkerStats.print(out);
+
+	auto now = std::chrono::system_clock::now();
+	std::chrono::duration<uint64_t, std::nano> lifetime = now - constructionTime;
+	uint64_t totalAvailableCpuTime = lifetime.count() * workerCount;
+
+	if constexpr (emper::STATS_WORKER_SLEEP) {
+		double sleepPercentage = static_cast<double>(globalWorkerStats.totalSleepNanos) /
+														 static_cast<double>(totalAvailableCpuTime);
+		out << "sleep-percentage: " << sleepPercentage << std::endl;
+	}
+
 	if (detailed) {
 		for (auto& stats : workerStatsSnapshot) {
 			stats.print();
diff --git a/emper/Runtime.hpp b/emper/Runtime.hpp
index 4fefbd51..9546b932 100644
--- a/emper/Runtime.hpp
+++ b/emper/Runtime.hpp
@@ -6,6 +6,7 @@
 #include <signal.h>
 
 #include <atomic>
+#include <chrono>
 #include <cstdint>		 // for intptr_t
 #include <functional>	 // for function
 #include <iostream>
@@ -70,6 +71,9 @@ class Runtime : public Logger<LogSubsystem::RUNTI> {
 
 	using StrategyFactory = std::function<RuntimeStrategy*(Runtime&)>;
 
+	const std::chrono::time_point<std::chrono::system_clock> constructionTime =
+			std::chrono::system_clock::now();
+
  private:
 	static std::mutex currentRuntimeMutex;
 	static Runtime* currentRuntime;
diff --git a/emper/lib/env.hpp b/emper/lib/env.hpp
index 1179253d..e5764e92 100644
--- a/emper/lib/env.hpp
+++ b/emper/lib/env.hpp
@@ -4,7 +4,7 @@
 
 #include <cinttypes>
 #include <cstdlib>
-#include <iterator>
+#include <limits>
 #include <memory>
 #include <optional>
 #include <ostream>
diff --git a/emper/stats/Worker.cpp b/emper/stats/Worker.cpp
index f60e8843..a984564c 100644
--- a/emper/stats/Worker.cpp
+++ b/emper/stats/Worker.cpp
@@ -3,6 +3,7 @@
 #include "Worker.hpp"
 
 #include <algorithm>
+#include <ratio>
 #include <string>
 
 #include "Emper.hpp"
@@ -51,6 +52,19 @@ void Worker::recordStackUsage(size_t stackUsage) {
 	lastStackUsage.push_back(stackUsage);
 }
 
+void Worker::recordSleep(std::chrono::time_point<std::chrono::system_clock>& sleepStart,
+												 std::chrono::time_point<std::chrono::system_clock>& sleepStop) {
+	std::chrono::duration<uint64_t, std::nano> sleepNanosDuration = sleepStop - sleepStart;
+	uint64_t sleepNanos = sleepNanosDuration.count();
+
+	totalSleepNanos += sleepNanos;
+	maxSleepNanos = std::max(maxSleepNanos, sleepNanos);
+	minSleepNanos = std::min(minSleepNanos, sleepNanos);
+	avgSleepNanos.update(sleepNanos);
+
+	sleepCount++;
+}
+
 auto Worker::operator+=(const Worker& other) -> Worker& {
 	maxBlockedContexts = std::max(maxBlockedContexts, other.maxBlockedContexts);
 	blockedGeneric += other.blockedGeneric;
@@ -72,6 +86,15 @@ auto Worker::operator+=(const Worker& other) -> Worker& {
 		lastStackUsage.clear();
 	}
 
+	if constexpr (emper::STATS_WORKER_SLEEP) {
+		sleepCount += other.sleepCount;
+		totalSleepNanos += other.totalSleepNanos;
+		maxSleepNanos = std::max(maxSleepNanos, other.maxSleepNanos);
+		minSleepNanos = std::min(minSleepNanos, other.minSleepNanos);
+		avgSleepNanos.update(static_cast<uint64_t>(other.avgSleepNanos.getAverage()));
+		wakeupInVainCount += other.wakeupInVainCount;
+	}
+
 	return *this;
 }
 
@@ -102,6 +125,18 @@ void Worker::print(std::ostream& out) {
 			out << std::endl;
 		}
 	}
+
+	if constexpr (emper::STATS_WORKER_SLEEP) {
+		// clang-format off
+		out << "sleep-count: " << sleepCount << std::endl
+				<< "wakeup-in-vain-count: " << wakeupInVainCount << std::endl
+				<< "total-sleep-ns: " << totalSleepNanos << std::endl
+				<< "max-sleep-ns: " << maxSleepNanos << std::endl
+				<< "min-sleep-ns: " << minSleepNanos << std::endl
+				<< "avg-sleep-ns: " << avgSleepNanos.getAverage() << std::endl
+			;
+		// clang-format on
+	}
 }
 
 }	 // namespace emper::stats
diff --git a/emper/stats/Worker.hpp b/emper/stats/Worker.hpp
index d7e1092a..c70dd52a 100644
--- a/emper/stats/Worker.hpp
+++ b/emper/stats/Worker.hpp
@@ -3,6 +3,7 @@
 #pragma once
 
 #include <boost/circular_buffer.hpp>
+#include <chrono>
 #include <cstddef>
 #include <cstdint>
 #include <iostream>
@@ -16,6 +17,7 @@ enum class LogSubsystem;
 template <LogSubsystem logSubsystem>
 class Blockable;	// IWYU pragma: keep
 class Context;
+class Dispatcher;
 class Runtime;
 
 namespace emper::stats {
@@ -43,6 +45,17 @@ class Worker {
 
 	void recordStackUsage(size_t stackUsage);
 
+	uint64_t sleepCount = 0;
+	uint64_t totalSleepNanos = 0;
+	uint64_t maxSleepNanos = 0;
+	uint64_t minSleepNanos = UINT64_MAX;
+	emper::lib::math::RunningAverage<double, uint64_t> avgSleepNanos;
+
+	void recordSleep(std::chrono::time_point<std::chrono::system_clock>& sleepStart,
+									 std::chrono::time_point<std::chrono::system_clock>& sleepStop);
+
+	size_t wakeupInVainCount = 0;
+
 	auto operator+=(const Worker& other) -> Worker&;
 
  public:
@@ -51,6 +64,7 @@ class Worker {
 	template <LogSubsystem>
 	friend class ::Blockable;
 	friend class ::Context;
+	friend class ::Dispatcher;
 	friend class ::Runtime;
 };
 
diff --git a/meson.build b/meson.build
index 6ef7c078..9499fc88 100644
--- a/meson.build
+++ b/meson.build
@@ -58,12 +58,37 @@ ws_queue_scheduler = get_option('ws_queue_scheduler')
 assume_page_size = get_option('assume_page_size')
 assume_cache_line_size = get_option('assume_cache_line_size')
 
+stats = get_option('stats')
+
+stats_all = get_option('stats_all')
+stats_worker_sleep_option = get_option('stats_worker_sleep')
+if stats_worker_sleep_option == 'enabled' or (stats_all and stats_worker_sleep_option == 'auto')
+  stats_worker_sleep = true
+  stats = true
+else
+  stats_worker_sleep = false
+endif
+stats_stack_usage_option = get_option('stats_stack_usage')
+if stats_stack_usage_option == 'auto'
+  if stats_all
+    stats_stack_usage = 'efficient'
+  else
+    stats_stack_usage = 'disabled'
+  endif
+else
+  stats_stack_usage = stats_stack_usage_option
+endif
+if stats_stack_usage != 'disabled'
+  stats = true
+endif
+
+
 conf_data.set('EMPER_WORKER_SLEEP', get_option('worker_sleep'))
 conf_data.set('EMPER_WORKER_WAKEUP_STRATEGY', get_option('worker_wakeup_strategy'))
 conf_data.set('EMPER_WORKER_IGNORE_WAKEUP_HINT', get_option('worker_ignore_wakeup_hint'))
 conf_data.set('EMPER_LOCKED_MPSC_QUEUE', get_option('locked_mpsc_queue'))
 conf_data.set('EMPER_OVERFLOW_QUEUE', get_option('overflow_queue'))
-conf_data.set('EMPER_STATS', get_option('stats'))
+conf_data.set('EMPER_STATS', stats)
 conf_data.set('EMPER_OVERFLOW_QUEUE', get_option('overflow_queue'))
 conf_data.set('EMPER_BLOCKED_CONTEXT_SET', get_option('blocked_context_set'))
 conf_data.set('EMPER_SET_AFFINITY_ON_BLOCK', get_option('set_affinity_on_block'))
@@ -77,7 +102,8 @@ conf_data.set('EMPER_ASSUME_PAGE_SIZE', assume_page_size)
 conf_data.set('EMPER_MIN_CONTEXT_STACK_SIZE', get_option('min_context_stack_size'))
 conf_data.set('EMPER_ASSUME_CACHE_LINE_SIZE', assume_cache_line_size)
 conf_data.set('EMPER_STACK_GUARD_PAGE', get_option('stack_guard_page'))
-conf_data.set('EMPER_STATS_STACK_USAGE', get_option('stats_stack_usage'))
+conf_data.set('EMPER_STATS_STACK_USAGE', stats_stack_usage)
+conf_data.set('EMPER_STATS_WORKER_SLEEP', stats_worker_sleep)
 
 context_alignment = get_option('context_alignment')
 if context_alignment == 'none'
diff --git a/meson_options.txt b/meson_options.txt
index 23bd0bfe..71da8976 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -127,11 +127,29 @@ option(
   type: 'combo',
   choices: [
     'disabled',
+    'auto',
     'efficient',
     'accurate',
   ],
-  value: 'disabled',
-  description: 'Collect stack-usage stats via stack watermarking'
+  value: 'auto',
+  description: 'Collect stack-usage stats via stack watermarking',
+)
+option(
+  'stats_worker_sleep',
+  type: 'combo',
+  choices: [
+    'disabled',
+    'auto',
+    'enabled',
+  ],
+  value: 'auto',
+  description: 'Collect stats about worker sleep',
+)
+option(
+  'stats_all',
+  type: 'boolean',
+  value: false,
+  description: 'Enable the collection of all EMPER runtime-system stats',
 )
 option(
   'default_scheduling_strategy',
-- 
GitLab