diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3591f70cf746c4b9d0928b664419650cad9046ee..c61680959a8e3dd06a2de31b8379e412bf8b1379 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -117,7 +117,7 @@ clang-tidy: .emper-stats: variables: - EMPER_STATS: 'true' + EMPER_STATS_ALL: 'true' .emper-userspace-rcu: variables: diff --git a/emper/Blockable.hpp b/emper/Blockable.hpp index fa7019264be11def44de57ab3ed6087771c308ef..e5ba18c2664c8494a9a136b53b2028fa0f23a2d3 100644 --- a/emper/Blockable.hpp +++ b/emper/Blockable.hpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: LGPL-3.0-or-later -// Copyright © 2020-2021 Florian Schmaus +// Copyright © 2020-2022 Florian Schmaus #pragma once #include <utility> @@ -42,7 +42,7 @@ class Blockable : public Logger<logSubsystem> { blockablePurpose(blockablePurpose) {} void maybeSetAffinity() { - if constexpr (!emper::SET_AFFINITY_ON_BLOCK && !emper::STATS) return; + if constexpr (!emper::SET_AFFINITY_ON_BLOCK && !emper::STATS_BLOCKED_CONTEXT) return; // TODO: At some point we may want to have something like // Runtime::isAffinityCapable() and return here if it is @@ -58,7 +58,7 @@ class Blockable : public Logger<logSubsystem> { this->affinity = &affinity_buffer; } - if (emper::STATS || setAffinityBuffer) { + if (emper::STATS_BLOCKED_CONTEXT || setAffinityBuffer) { affinity_buffer = Runtime::getWorkerId(); } } @@ -75,23 +75,19 @@ class Blockable : public Logger<logSubsystem> { maybeSetAffinity(); - decltype(blockedContexts)::set_type::size_type blockedContextCount = 0; + if constexpr (emper::STATS_BLOCKED_CONTEXT_COUNT) { + size_t blockedContextCount = + runtime.blockedContextCount.fetch_add(1, std::memory_order_relaxed) + 1; + emper::stats::worker->recordBlockedContextCount(blockedContextCount); + } + if constexpr (emper::BLOCKED_CONTEXT_SET) { auto* currentContext = Context::getCurrentContext(); - - if constexpr (emper::STATS) { - blockedContextCount = blockedContexts.insertAndGetSize(currentContext); - } else { - blockedContexts.insert(currentContext); - } + blockedContexts.insert(currentContext); } - if constexpr (emper::STATS) { - // Note that blockedContextCount may not been set here if - // emper::BLOCKED_CONTEXT_SET is not enabled. But we will at - // least record a blocked context here if emper::STATS is - // enabled. - emper::stats::worker->recordBlockedContext(blockedContextCount, blockablePurpose); + if constexpr (emper::STATS_BLOCKED_CONTEXT) { + emper::stats::worker->recordBlockedContext(blockablePurpose); } contextManager.saveAndStartNew(std::move(freshContextHook)); @@ -99,11 +95,14 @@ class Blockable : public Logger<logSubsystem> { auto unblockAndGetContinuation(Context* context) -> Fiber* { assert(context != nullptr); + if constexpr (emper::STATS_BLOCKED_CONTEXT_COUNT) { + runtime.blockedContextCount.fetch_sub(1, std::memory_order_relaxed); + } if constexpr (emper::BLOCKED_CONTEXT_SET) { blockedContexts.erase(context); } - if constexpr (emper::STATS) { + if constexpr (emper::STATS_BLOCKED_CONTEXT) { if (likely(emper::stats::worker)) { emper::stats::worker->recordUnblockedContext(affinity_buffer, blockablePurpose); } else { diff --git a/emper/Dispatcher.cpp b/emper/Dispatcher.cpp index 2499cad08cfd38009c2ebdd69385e103f485bac9..44b156a051256f890bf375c8fd54aa592599e5bc 100644 --- a/emper/Dispatcher.cpp +++ b/emper/Dispatcher.cpp @@ -1,11 +1,21 @@ // SPDX-License-Identifier: LGPL-3.0-or-later -// Copyright © 2020 Florian Schmaus +// Copyright © 2020-2022 Florian Schmaus #include "Dispatcher.hpp" #include <sched.h> +#include <atomic> +#include <chrono> + #include "Emper.hpp" #include "Runtime.hpp" // for Runtime +#include "WakeupStrategy.hpp" +#include "sleep_strategy/SemaphoreWorkerSleepStrategy.hpp" +#include "stats/Worker.hpp" + +// Note that we initialize this with true to avoid counting workers +// that initially go to sleep as being woken up in vain. +thread_local bool Dispatcher::dispatchedWork = true; auto Dispatcher::getDispatchLoop() -> func_t { return [this] { dispatchLoop(); }; @@ -15,8 +25,50 @@ void Dispatcher::dispatchLoopDoSleep() { runtime.maybeTerminateWorker(); if constexpr (emper::WORKER_SLEEP) { - runtime.dispatchLoopSleep(); + dispatchLoopSleep(); } else { sched_yield(); } } + +void Dispatcher::dispatchLoopSleep() { + auto& wakeupStrategy = runtime.wakeupStrategy; + auto& workerSleepStrategy = runtime.workerSleepStrategy; + auto& terminateWorkers = runtime.terminateWorkers; + + using clock = std::chrono::system_clock; + std::chrono::time_point<clock> sleepStart; + bool workerSlept = false; + + bool canWake; + do { + // Notify the wakeup strategy about our sleep attempt + // TODO: We could take stats how often the wakeupStrategy decided + // that the worker should not sleep, and how often this was the + // right decission (based on dispatchedWork and a new thread_local sleepDenied). + if (!wakeupStrategy.canSleep()) break; + + if constexpr (emper::STATS_WORKER_SLEEP) { + if (!workerSlept) { + if (!dispatchedWork) emper::stats::worker->wakeupInVainCount++; + workerSlept = true; + sleepStart = clock::now(); + } + } + + workerSleepStrategy.sleep(); + + // We always wakeup if the runtime is terminating + canWake = wakeupStrategy.canWakeup() || terminateWorkers.load(std::memory_order_relaxed); + } while (!canWake); + + if constexpr (emper::STATS_WORKER_SLEEP) { + if (workerSlept) { + auto sleepStop = clock::now(); + emper::stats::worker->recordSleep(sleepStart, sleepStop); + // TODO: Not sure if we should set dispatchedWork to false regardless of the value of + // firstSleep + dispatchedWork = false; + } + } +} diff --git a/emper/Dispatcher.hpp b/emper/Dispatcher.hpp index 60e299aa6e8287add6d4d7ea12e34201619f5eba..bccd2eaa6fe037632f7c185003dd4bd5c2fc0940 100644 --- a/emper/Dispatcher.hpp +++ b/emper/Dispatcher.hpp @@ -6,6 +6,7 @@ #include "Common.hpp" #include "Context.hpp" #include "Debug.hpp" +#include "Emper.hpp" #include "Fiber.hpp" #include "emper-common.h" @@ -13,6 +14,14 @@ class Runtime; class ContextManager; class Dispatcher : public Logger<LogSubsystem::DISP> { + private: + void dispatchLoopSleep(); + + /** + * Marker bool if the worker dispatched work, used for "worker sleep" stats. + */ + static thread_local bool dispatchedWork; + protected: Runtime& runtime; @@ -27,6 +36,11 @@ class Dispatcher : public Logger<LogSubsystem::DISP> { // NOLINTNEXTLINE(readability-convert-member-functions-to-static) inline void dispatch(AbstractFiber* fiber) { LOGD("executing fiber " << fiber); + + if constexpr (emper::STATS_WORKER_SLEEP) { + dispatchedWork = true; + } + Context::setCurrentFiber(fiber); fiber->run(); } diff --git a/emper/Emper.hpp b/emper/Emper.hpp index bc87b2eaa9db06dbbd2d8ff24e5c917b7cdfc5e9..77c40ed1c3b0c9eeb40161f29f1abc5e0733de1c 100644 --- a/emper/Emper.hpp +++ b/emper/Emper.hpp @@ -35,13 +35,18 @@ const StatsStackUsage STATS_STACK_USAGE = StatsStackUsage::EMPER_STATS_STACK_USA const bool STATS_STACK_USAGE_ENABLED = STATS_STACK_USAGE != StatsStackUsage::disabled; +constexpr bool STATS_WORKER_SLEEP = EMPER_STATS_WORKER_SLEEP; +constexpr bool STATS_BLOCKED_CONTEXT = EMPER_STATS_BLOCKED_CONTEXT; +constexpr bool STATS_BLOCKED_CONTEXT_COUNT = EMPER_STATS_BLOCKED_CONTEXT_COUNT; + const bool STATS = #ifdef EMPER_STATS true #else false #endif - || STATS_STACK_USAGE_ENABLED; + || STATS_STACK_USAGE_ENABLED || STATS_WORKER_SLEEP || STATS_BLOCKED_CONTEXT || + STATS_BLOCKED_CONTEXT_COUNT; template <typename C> void statsIncr(C& counter) { diff --git a/emper/Runtime.cpp b/emper/Runtime.cpp index a897ff116a73c2c15dd36b64e146c26649cc9e23..144cd013252b31b2fd172efa6c8aff77688f3ff4 100644 --- a/emper/Runtime.cpp +++ b/emper/Runtime.cpp @@ -17,6 +17,7 @@ #include <fstream> // IWYU pragma: keep #include <iostream> #include <memory> // for __shared_ptr_access, shared_ptr +#include <ratio> #include <string> // for string #include <thread> @@ -498,6 +499,17 @@ void Runtime::printStats(std::ostream& out, bool detailed) { } globalWorkerStats.print(out); + + auto now = std::chrono::system_clock::now(); + std::chrono::duration<uint64_t, std::nano> lifetime = now - constructionTime; + uint64_t totalAvailableCpuTime = lifetime.count() * workerCount; + + if constexpr (emper::STATS_WORKER_SLEEP) { + double sleepPercentage = static_cast<double>(globalWorkerStats.totalSleepNanos) / + static_cast<double>(totalAvailableCpuTime); + out << "sleep-percentage: " << sleepPercentage << std::endl; + } + if (detailed) { for (auto& stats : workerStatsSnapshot) { stats.print(); diff --git a/emper/Runtime.hpp b/emper/Runtime.hpp index 40681e9da0abc6eca5f19c6688b4dfc30ab372f0..b8388f878b1d04137bcea6cb7749d1bdc419245e 100644 --- a/emper/Runtime.hpp +++ b/emper/Runtime.hpp @@ -6,6 +6,7 @@ #include <signal.h> #include <atomic> +#include <chrono> #include <cstdint> // for intptr_t #include <functional> // for function #include <iostream> @@ -70,6 +71,9 @@ class Runtime : public Logger<LogSubsystem::RUNTI> { using StrategyFactory = std::function<RuntimeStrategy*(Runtime&)>; + const std::chrono::time_point<std::chrono::system_clock> constructionTime = + std::chrono::system_clock::now(); + private: static std::mutex currentRuntimeMutex; static Runtime* currentRuntime; @@ -117,6 +121,9 @@ class Runtime : public Logger<LogSubsystem::RUNTI> { WorkerSleepStrategy workerSleepStrategy; emper::WakeupStrategy wakeupStrategy; + // Per runtime-system stats + ALIGN_TO_CACHE_LINE std::atomic<size_t> blockedContextCount{0}; + static StrategyFactory DEFAULT_STRATEGY_FACTORY; static void printLastRuntimeStats(std::ostream& out = std::cout); @@ -166,19 +173,6 @@ class Runtime : public Logger<LogSubsystem::RUNTI> { void maybeTerminateWorker(); - void dispatchLoopSleep() { - bool canWake; - do { - // Notify the wakeup strategy about our sleep attempt - if (!wakeupStrategy.canSleep()) break; - - workerSleepStrategy.sleep(); - - // We always wakeup if the runtime is terminating - canWake = wakeupStrategy.canWakeup() || terminateWorkers.load(std::memory_order_relaxed); - } while (!canWake); - } - auto nextFiber() -> std::optional<NextFiberResult>; public: diff --git a/emper/lib/env.hpp b/emper/lib/env.hpp index 1179253da971193d0bb7da885e23ea730526b802..e5764e92f0457256bbd3d3f01426b3cbf636da76 100644 --- a/emper/lib/env.hpp +++ b/emper/lib/env.hpp @@ -4,7 +4,7 @@ #include <cinttypes> #include <cstdlib> -#include <iterator> +#include <limits> #include <memory> #include <optional> #include <ostream> diff --git a/emper/stats/Worker.cpp b/emper/stats/Worker.cpp index f60e884300bd40704aece7c8c3e04fdded9ff48c..3b2cac4e7d358f122927483db3447cc5516bba8c 100644 --- a/emper/stats/Worker.cpp +++ b/emper/stats/Worker.cpp @@ -3,6 +3,7 @@ #include "Worker.hpp" #include <algorithm> +#include <ratio> #include <string> #include "Emper.hpp" @@ -19,9 +20,11 @@ Worker::Worker(workerid_t workerCount) unblockAffinitiesIo(std::vector<uint32_t>(workerCount)), lastStackUsage(32) {} -void Worker::recordBlockedContext(size_t blockedContextCount, BlockablePurpose blockablePurpose) { +void Worker::recordBlockedContextCount(size_t blockedContextCount) { maxBlockedContexts = std::max(maxBlockedContexts, blockedContextCount); +} +void Worker::recordBlockedContext(BlockablePurpose blockablePurpose) { switch (blockablePurpose) { case BlockablePurpose::GENERIC: blockedGeneric++; @@ -51,15 +54,33 @@ void Worker::recordStackUsage(size_t stackUsage) { lastStackUsage.push_back(stackUsage); } +void Worker::recordSleep(std::chrono::time_point<std::chrono::system_clock>& sleepStart, + std::chrono::time_point<std::chrono::system_clock>& sleepStop) { + std::chrono::duration<uint64_t, std::nano> sleepNanosDuration = sleepStop - sleepStart; + uint64_t sleepNanos = sleepNanosDuration.count(); + + totalSleepNanos += sleepNanos; + maxSleepNanos = std::max(maxSleepNanos, sleepNanos); + minSleepNanos = std::min(minSleepNanos, sleepNanos); + avgSleepNanos.update(sleepNanos); + + sleepCount++; +} + auto Worker::operator+=(const Worker& other) -> Worker& { - maxBlockedContexts = std::max(maxBlockedContexts, other.maxBlockedContexts); - blockedGeneric += other.blockedGeneric; - blockedIo += other.blockedIo; - - for (decltype(unblockAffinitiesGeneric)::size_type i = 0; i < unblockAffinitiesGeneric.size(); - i++) { - unblockAffinitiesGeneric[i] += other.unblockAffinitiesGeneric[i]; - unblockAffinitiesIo[i] += other.unblockAffinitiesIo[i]; + if constexpr (emper::STATS_BLOCKED_CONTEXT_COUNT) { + maxBlockedContexts = std::max(maxBlockedContexts, other.maxBlockedContexts); + } + + if constexpr (emper::STATS_BLOCKED_CONTEXT) { + blockedGeneric += other.blockedGeneric; + blockedIo += other.blockedIo; + + for (decltype(unblockAffinitiesGeneric)::size_type i = 0; i < unblockAffinitiesGeneric.size(); + i++) { + unblockAffinitiesGeneric[i] += other.unblockAffinitiesGeneric[i]; + unblockAffinitiesIo[i] += other.unblockAffinitiesIo[i]; + } } if constexpr (emper::STATS_STACK_USAGE_ENABLED) { @@ -72,21 +93,35 @@ auto Worker::operator+=(const Worker& other) -> Worker& { lastStackUsage.clear(); } + if constexpr (emper::STATS_WORKER_SLEEP) { + sleepCount += other.sleepCount; + totalSleepNanos += other.totalSleepNanos; + maxSleepNanos = std::max(maxSleepNanos, other.maxSleepNanos); + minSleepNanos = std::min(minSleepNanos, other.minSleepNanos); + avgSleepNanos.update(static_cast<uint64_t>(other.avgSleepNanos.getAverage())); + wakeupInVainCount += other.wakeupInVainCount; + } + return *this; } void Worker::print(std::ostream& out) { - out << "max-blocked-contexts: " << std::to_string(maxBlockedContexts) << std::endl - << "blocked-generic: " << std::to_string(blockedGeneric) << std::endl; - for (decltype(unblockAffinitiesGeneric)::size_type i = 0; i < unblockAffinitiesGeneric.size(); - ++i) { - out << "unblock-affinities-generic" << std::to_string(i) << ": " - << std::to_string(unblockAffinitiesGeneric[i]) << std::endl; + if constexpr (emper::STATS_BLOCKED_CONTEXT_COUNT) { + out << "max-blocked-contexts: " << std::to_string(maxBlockedContexts) << std::endl; } - out << "blocked-io: " << std::to_string(blockedIo) << std::endl; - for (decltype(unblockAffinitiesIo)::size_type i = 0; i < unblockAffinitiesIo.size(); ++i) { - out << "unblock-affinities-io" << std::to_string(i) << ": " - << std::to_string(unblockAffinitiesIo[i]) << std::endl; + + if constexpr (emper::STATS_BLOCKED_CONTEXT) { + out << "blocked-generic: " << std::to_string(blockedGeneric) << std::endl; + for (decltype(unblockAffinitiesGeneric)::size_type i = 0; i < unblockAffinitiesGeneric.size(); + ++i) { + out << "unblock-affinities-generic" << std::to_string(i) << ": " + << std::to_string(unblockAffinitiesGeneric[i]) << std::endl; + } + out << "blocked-io: " << std::to_string(blockedIo) << std::endl; + for (decltype(unblockAffinitiesIo)::size_type i = 0; i < unblockAffinitiesIo.size(); ++i) { + out << "unblock-affinities-io" << std::to_string(i) << ": " + << std::to_string(unblockAffinitiesIo[i]) << std::endl; + } } if constexpr (emper::STATS_STACK_USAGE_ENABLED) { @@ -102,6 +137,18 @@ void Worker::print(std::ostream& out) { out << std::endl; } } + + if constexpr (emper::STATS_WORKER_SLEEP) { + // clang-format off + out << "sleep-count: " << sleepCount << std::endl + << "wakeup-in-vain-count: " << wakeupInVainCount << std::endl + << "total-sleep-ns: " << totalSleepNanos << std::endl + << "max-sleep-ns: " << maxSleepNanos << std::endl + << "min-sleep-ns: " << minSleepNanos << std::endl + << "avg-sleep-ns: " << avgSleepNanos.getAverage() << std::endl + ; + // clang-format on + } } } // namespace emper::stats diff --git a/emper/stats/Worker.hpp b/emper/stats/Worker.hpp index d7e1092a41b206406c4192504a35c97230469a65..f302ddf9ef71d5606b29fdb4c758f4fa56312452 100644 --- a/emper/stats/Worker.hpp +++ b/emper/stats/Worker.hpp @@ -3,6 +3,7 @@ #pragma once #include <boost/circular_buffer.hpp> +#include <chrono> #include <cstddef> #include <cstdint> #include <iostream> @@ -16,6 +17,7 @@ enum class LogSubsystem; template <LogSubsystem logSubsystem> class Blockable; // IWYU pragma: keep class Context; +class Dispatcher; class Runtime; namespace emper::stats { @@ -32,8 +34,8 @@ class Worker { Worker(workerid_t workerCount); - void recordBlockedContext(size_t blockedContextCount, BlockablePurpose blockablePurpose); - + void recordBlockedContextCount(size_t blockedContextCount); + void recordBlockedContext(BlockablePurpose blockablePurpose); void recordUnblockedContext(workeraffinity_t workerAffinity, BlockablePurpose blockablePurpose); size_t maxStackUsage = 0; @@ -43,6 +45,17 @@ class Worker { void recordStackUsage(size_t stackUsage); + uint64_t sleepCount = 0; + uint64_t totalSleepNanos = 0; + uint64_t maxSleepNanos = 0; + uint64_t minSleepNanos = UINT64_MAX; + emper::lib::math::RunningAverage<double, uint64_t> avgSleepNanos; + + void recordSleep(std::chrono::time_point<std::chrono::system_clock>& sleepStart, + std::chrono::time_point<std::chrono::system_clock>& sleepStop); + + size_t wakeupInVainCount = 0; + auto operator+=(const Worker& other) -> Worker&; public: @@ -51,6 +64,7 @@ class Worker { template <LogSubsystem> friend class ::Blockable; friend class ::Context; + friend class ::Dispatcher; friend class ::Runtime; }; diff --git a/meson.build b/meson.build index 6ef7c078f701a26f69c6b4310a70c7bbe209f3b6..d08933072666fc92f59feb13559ef2a59ef0a4f2 100644 --- a/meson.build +++ b/meson.build @@ -58,6 +58,37 @@ ws_queue_scheduler = get_option('ws_queue_scheduler') assume_page_size = get_option('assume_page_size') assume_cache_line_size = get_option('assume_cache_line_size') +stats_all = get_option('stats_all') +stats_worker_sleep_option = get_option('stats_worker_sleep') +if stats_worker_sleep_option == 'true' or (stats_all and stats_worker_sleep_option == 'auto') + stats_worker_sleep = 'true' +else + stats_worker_sleep = 'false' +endif +stats_stack_usage_option = get_option('stats_stack_usage') +if stats_stack_usage_option == 'auto' + if stats_all + stats_stack_usage = 'efficient' + else + stats_stack_usage = 'disabled' + endif +else + stats_stack_usage = stats_stack_usage_option +endif +stats_blocked_context_option = get_option('stats_blocked_context') +if stats_blocked_context_option == 'true' or (stats_all and stats_blocked_context_option == 'auto') + stats_blocked_context = 'true' +else + stats_blocked_context = 'false' +endif +stats_blocked_context_count_option = get_option('stats_blocked_context_count') +if stats_blocked_context_count_option == 'true' or (stats_all and stats_blocked_context_count_option == 'auto') + stats_blocked_context_count = 'true' +else + stats_blocked_context_count = 'false' +endif + + conf_data.set('EMPER_WORKER_SLEEP', get_option('worker_sleep')) conf_data.set('EMPER_WORKER_WAKEUP_STRATEGY', get_option('worker_wakeup_strategy')) conf_data.set('EMPER_WORKER_IGNORE_WAKEUP_HINT', get_option('worker_ignore_wakeup_hint')) @@ -77,7 +108,10 @@ conf_data.set('EMPER_ASSUME_PAGE_SIZE', assume_page_size) conf_data.set('EMPER_MIN_CONTEXT_STACK_SIZE', get_option('min_context_stack_size')) conf_data.set('EMPER_ASSUME_CACHE_LINE_SIZE', assume_cache_line_size) conf_data.set('EMPER_STACK_GUARD_PAGE', get_option('stack_guard_page')) -conf_data.set('EMPER_STATS_STACK_USAGE', get_option('stats_stack_usage')) +conf_data.set('EMPER_STATS_STACK_USAGE', stats_stack_usage) +conf_data.set('EMPER_STATS_WORKER_SLEEP', stats_worker_sleep) +conf_data.set('EMPER_STATS_BLOCKED_CONTEXT', stats_blocked_context) +conf_data.set('EMPER_STATS_BLOCKED_CONTEXT_COUNT', stats_blocked_context_count) context_alignment = get_option('context_alignment') if context_alignment == 'none' diff --git a/meson_options.txt b/meson_options.txt index 9a5dfb73daee0df4b031e92711eb11de7e890551..8762050f217ce4ae936c5cc54ceb6c92a4ec6517 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -127,11 +127,51 @@ option( type: 'combo', choices: [ 'disabled', + 'auto', 'efficient', - 'accuraate', + 'accurate', ], - value: 'disabled', - description: 'Collect stack-usage stats via stack watermarking' + value: 'auto', + description: 'Collect stack-usage stats via stack watermarking', +) +option( + 'stats_worker_sleep', + type: 'combo', + choices: [ + 'false', + 'auto', + 'true', + ], + value: 'auto', + description: 'Collect stats about worker sleep', +) +option( + 'stats_blocked_context', + type: 'combo', + choices: [ + 'false', + 'auto', + 'true', + ], + value: 'auto', + description: 'Collect stats about blocked contexts', +) +option( + 'stats_blocked_context_count', + type: 'combo', + choices: [ + 'false', + 'auto', + 'true', + ], + value: 'auto', + description: 'Collect stats about number of blocked contexts', +) +option( + 'stats_all', + type: 'boolean', + value: false, + description: 'Enable the collection of all EMPER runtime-system stats', ) option( 'default_scheduling_strategy',