diff --git a/emper/Context.cpp b/emper/Context.cpp index 4c696f5ab850dbbbb1520b4f083d5288172f5f14..2c8ac30ae653662f68a85a5cca03d87489859010 100644 --- a/emper/Context.cpp +++ b/emper/Context.cpp @@ -8,6 +8,8 @@ #include <cstdint> #include <ostream> +#include "stats/Worker.hpp" + thread_local Context* Context::currentContext; auto Context::calcTos(char* context) -> void* { @@ -147,7 +149,7 @@ void Context::unmap(void* sp) const { } void Context::maybeMarkStack() { - if constexpr (!emper::DEBUG) return; + if constexpr (!emper::DEBUG && !emper::STATS_STACK_USAGE_ENABLED) return; // Write the stack full of 0xcc bytes, which just happen to be // the 'int3' instruction, which will trigger a breakpoint @@ -163,3 +165,17 @@ void Context::maybeMarkStack() { res = memset(tos, 0xee, &context[CONTEXT_SIZE] - (char*)tos); if (!res) DIE; } + +void Context::recordStackUsage() { + constexpr uintptr_t stackMarker = 0xcccccccccccccccc; + + size_t bosOffset; + for (bosOffset = 0; bosOffset < usableStackSize; bosOffset += sizeof(stackMarker)) { + auto* stackWord = (uintptr_t*)((char*)bos + bosOffset); + if (*stackWord != stackMarker) break; + } + + size_t stackUsage = usableStackSize - bosOffset; + + emper::stats::worker->recordStackUsage(stackUsage); +} diff --git a/emper/Context.hpp b/emper/Context.hpp index d91702ac0752255fdd825e6b5ef5f466fa416e94..ec4bb37f3f09ff512555719c9302f272a58321d1 100644 --- a/emper/Context.hpp +++ b/emper/Context.hpp @@ -95,6 +95,8 @@ class EMPER_CONTEXT_ALIGNAS Context : Logger<LogSubsystem::C> { void maybeMarkStack(); + void recordStackUsage(); + static auto calcTos(char* context) -> void*; auto calcTos() -> void*; diff --git a/emper/ContextManager.cpp b/emper/ContextManager.cpp index ef044ea4fc67283b1da368f7ae20b38a5e741d93..7d99c1aac323436be172dea05237b517cebdc56c 100644 --- a/emper/ContextManager.cpp +++ b/emper/ContextManager.cpp @@ -23,15 +23,28 @@ auto ContextManager::getFreeContext() -> Context* { if constexpr (!emper::CONTEXT_MANAGER_WITH_MEMORY_MANAGER) { return new Context(this->runtime.dispatcher.getDispatchLoop()); } + bool malloced; void* memory = getMemory(&malloced); if (malloced) { return new (memory) Context(this->runtime.dispatcher.getDispatchLoop()); } - return static_cast<Context*>(memory); + + auto* context = static_cast<Context*>(memory); + if constexpr (emper::STATS_STACK_USAGE == emper::StatsStackUsage::accurate) { + // If we want accurate stack-usage stats, then we need to re-mark + // the stack if we got the memory from the memory manager but if + // it wasn't malloced. + context->maybeMarkStack(); + } + return context; } void ContextManager::putFreeContext(Context* context) { + if constexpr (emper::STATS_STACK_USAGE_ENABLED) { + context->recordStackUsage(); + } + if constexpr (!emper::CONTEXT_MANAGER_WITH_MEMORY_MANAGER) { putMemory(context); } else { diff --git a/emper/Emper.hpp b/emper/Emper.hpp index 876429339de81c15eac612a45c8f7958059aa416..25c2c78c744a3f51a42caf9fb4c5d2eeadf1f444 100644 --- a/emper/Emper.hpp +++ b/emper/Emper.hpp @@ -25,13 +25,23 @@ void printInfo(std::ostream& strm = std::cout); const size_t WS_VICTIM_COUNT = EMPER_WS_VICTIM_COUNT; const size_t WS_VICTIM_DENOMINATOR = EMPER_WS_VICTIM_DENOMINATOR; +enum class StatsStackUsage { + disabled, + efficient, + accurate, +}; + +const StatsStackUsage STATS_STACK_USAGE = StatsStackUsage::EMPER_STATS_STACK_USAGE; + +const bool STATS_STACK_USAGE_ENABLED = STATS_STACK_USAGE != StatsStackUsage::disabled; + const bool STATS = #ifdef EMPER_STATS true #else false #endif - ; + || STATS_STACK_USAGE_ENABLED; template <typename C> void statsIncr(C& counter) { diff --git a/emper/lib/util.cpp b/emper/lib/util.cpp index 935750231356abe227cfd3e0674eaf4080e52187..2a0594b0fd21ff061b6cddf79ee428ef27ec4fbc 100644 --- a/emper/lib/util.cpp +++ b/emper/lib/util.cpp @@ -2,7 +2,6 @@ // Copyright © 2022 Florian Schmaus #include "lib/util.hpp" -#include <sstream> #include <utility> #include <vector> @@ -20,4 +19,15 @@ auto bytesToHumanReadableString(uintptr_t bytes) -> std::string { return sstrm.str(); } +void writeByteStats(uintptr_t bytes, std::ostream& out) { + auto res = tu::toHumanReadableBytes(bytes); + out << bytes << " (" << res.first << " " << res.second << ")"; +} + +void writeByteStats(const std::string&& name, uintptr_t bytes, std::ostream& out) { + out << name << ": "; + writeByteStats(bytes, out); + out << std::endl; +} + } // namespace emper::lib::util diff --git a/emper/lib/util.hpp b/emper/lib/util.hpp index 7be7de4c30c516491ff2414c17c59d18f73e1482..8b0b0b4c5db539ce0e24ca56e099903fdd9b6c8b 100644 --- a/emper/lib/util.hpp +++ b/emper/lib/util.hpp @@ -3,10 +3,15 @@ #pragma once #include <cstdint> +#include <ostream> #include <string> namespace emper::lib::util { auto bytesToHumanReadableString(uintptr_t bytes) -> std::string; -} +void writeByteStats(uintptr_t bytes, std::ostream& out); + +void writeByteStats(const std::string&& name, uintptr_t bytes, std::ostream& out); + +} // namespace emper::lib::util diff --git a/emper/stats/Worker.cpp b/emper/stats/Worker.cpp index ecfc444e12fa76ee34263821dc2cf6b6923ffb77..f60e884300bd40704aece7c8c3e04fdded9ff48c 100644 --- a/emper/stats/Worker.cpp +++ b/emper/stats/Worker.cpp @@ -1,17 +1,23 @@ // SPDX-License-Identifier: LGPL-3.0-or-later -// Copyright © 2021 Florian Schmaus +// Copyright © 2021-2022 Florian Schmaus #include "Worker.hpp" #include <algorithm> #include <string> -using namespace emper::stats; +#include "Emper.hpp" +#include "lib/util.hpp" -thread_local Worker* emper::stats::worker = nullptr; +namespace eu = emper::lib::util; + +namespace emper::stats { + +thread_local Worker* worker = nullptr; Worker::Worker(workerid_t workerCount) : unblockAffinitiesGeneric(std::vector<uint32_t>(workerCount)), - unblockAffinitiesIo(std::vector<uint32_t>(workerCount)) {} + unblockAffinitiesIo(std::vector<uint32_t>(workerCount)), + lastStackUsage(32) {} void Worker::recordBlockedContext(size_t blockedContextCount, BlockablePurpose blockablePurpose) { maxBlockedContexts = std::max(maxBlockedContexts, blockedContextCount); @@ -38,6 +44,13 @@ void Worker::recordUnblockedContext(workeraffinity_t workerAffinity, } } +void Worker::recordStackUsage(size_t stackUsage) { + maxStackUsage = std::max(stackUsage, maxStackUsage); + minStackUsage = std::min(stackUsage, minStackUsage); + averageStackUsage.update(stackUsage); + lastStackUsage.push_back(stackUsage); +} + auto Worker::operator+=(const Worker& other) -> Worker& { maxBlockedContexts = std::max(maxBlockedContexts, other.maxBlockedContexts); blockedGeneric += other.blockedGeneric; @@ -48,6 +61,17 @@ auto Worker::operator+=(const Worker& other) -> Worker& { unblockAffinitiesGeneric[i] += other.unblockAffinitiesGeneric[i]; unblockAffinitiesIo[i] += other.unblockAffinitiesIo[i]; } + + if constexpr (emper::STATS_STACK_USAGE_ENABLED) { + maxStackUsage = std::max(maxStackUsage, other.maxStackUsage); + minStackUsage = std::min(minStackUsage, other.minStackUsage); + averageStackUsage.update(other.averageStackUsage.getAverage()); + + // We clear the history of last stack usages, as "adding" them + // together is not sensible. + lastStackUsage.clear(); + } + return *this; } @@ -64,4 +88,20 @@ void Worker::print(std::ostream& out) { out << "unblock-affinities-io" << std::to_string(i) << ": " << std::to_string(unblockAffinitiesIo[i]) << std::endl; } + + if constexpr (emper::STATS_STACK_USAGE_ENABLED) { + eu::writeByteStats("max-stack-usage", maxStackUsage, out); + eu::writeByteStats("min-stack-usage", minStackUsage, out); + auto averageStackUsageInteger = static_cast<uintptr_t>(averageStackUsage.getAverage()); + eu::writeByteStats("avg-stack-usage", averageStackUsageInteger, out); + if (!lastStackUsage.empty()) { + out << "last-stack-usage: "; + for (size_t stackUsage : lastStackUsage) { + eu::writeByteStats(stackUsage, out); + } + out << std::endl; + } + } } + +} // namespace emper::stats diff --git a/emper/stats/Worker.hpp b/emper/stats/Worker.hpp index f49a799d2753677092c10add95e562213d2bf238..d7e1092a41b206406c4192504a35c97230469a65 100644 --- a/emper/stats/Worker.hpp +++ b/emper/stats/Worker.hpp @@ -1,7 +1,8 @@ // SPDX-License-Identifier: LGPL-3.0-or-later -// Copyright © 2021 Florian Schmaus +// Copyright © 2021-2022 Florian Schmaus #pragma once +#include <boost/circular_buffer.hpp> #include <cstddef> #include <cstdint> #include <iostream> @@ -9,10 +10,12 @@ #include "BlockablePurpose.hpp" #include "emper-common.h" +#include "lib/math.hpp" enum class LogSubsystem; template <LogSubsystem logSubsystem> class Blockable; // IWYU pragma: keep +class Context; class Runtime; namespace emper::stats { @@ -33,6 +36,13 @@ class Worker { void recordUnblockedContext(workeraffinity_t workerAffinity, BlockablePurpose blockablePurpose); + size_t maxStackUsage = 0; + size_t minStackUsage = SIZE_MAX; + emper::lib::math::RunningAverage<double, size_t> averageStackUsage; + boost::circular_buffer<size_t> lastStackUsage; + + void recordStackUsage(size_t stackUsage); + auto operator+=(const Worker& other) -> Worker&; public: @@ -40,6 +50,7 @@ class Worker { template <LogSubsystem> friend class ::Blockable; + friend class ::Context; friend class ::Runtime; }; diff --git a/meson.build b/meson.build index b1dc37d3ff7587f07111d11d1f0c3e54fcb0be9d..8928df3906f7fe17e07b3720cb2d359643fd2c87 100644 --- a/meson.build +++ b/meson.build @@ -76,6 +76,7 @@ conf_data.set('EMPER_ASSUME_PAGE_SIZE', assume_page_size) conf_data.set('EMPER_MIN_CONTEXT_STACK_SIZE', get_option('min_context_stack_size')) conf_data.set('EMPER_ASSUME_CACHE_LINE_SIZE', assume_cache_line_size) conf_data.set('EMPER_STACK_GUARD_PAGE', get_option('stack_guard_page')) +conf_data.set('EMPER_STATS_STACK_USAGE', get_option('stats_stack_usage')) context_alignment = get_option('context_alignment') if context_alignment == 'none' diff --git a/meson_options.txt b/meson_options.txt index 91a3c666adc3805575185b6ddd6eac17a2fd0c3b..ecca5993f419bc395510b2dc768e88911464bf2e 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -101,6 +101,17 @@ option( value: false, description: 'Collect stats and print them at the end of the execution' ) +option( + 'stats_stack_usage', + type: 'combo', + choices: [ + 'disabled', + 'efficient', + 'accuraate', + ], + value: 'disabled', + description: 'Collect stack-usage stats via stack watermarking' +) option( 'default_scheduling_strategy', type: 'combo',