diff --git a/emper/Context.cpp b/emper/Context.cpp
index 4c696f5ab850dbbbb1520b4f083d5288172f5f14..2c8ac30ae653662f68a85a5cca03d87489859010 100644
--- a/emper/Context.cpp
+++ b/emper/Context.cpp
@@ -8,6 +8,8 @@
 #include <cstdint>
 #include <ostream>
 
+#include "stats/Worker.hpp"
+
 thread_local Context* Context::currentContext;
 
 auto Context::calcTos(char* context) -> void* {
@@ -147,7 +149,7 @@ void Context::unmap(void* sp) const {
 }
 
 void Context::maybeMarkStack() {
-	if constexpr (!emper::DEBUG) return;
+	if constexpr (!emper::DEBUG && !emper::STATS_STACK_USAGE_ENABLED) return;
 
 	// Write the stack full of 0xcc bytes, which just happen to be
 	// the 'int3' instruction, which will trigger a breakpoint
@@ -163,3 +165,17 @@ void Context::maybeMarkStack() {
 	res = memset(tos, 0xee, &context[CONTEXT_SIZE] - (char*)tos);
 	if (!res) DIE;
 }
+
+void Context::recordStackUsage() {
+	constexpr uintptr_t stackMarker = 0xcccccccccccccccc;
+
+	size_t bosOffset;
+	for (bosOffset = 0; bosOffset < usableStackSize; bosOffset += sizeof(stackMarker)) {
+		auto* stackWord = (uintptr_t*)((char*)bos + bosOffset);
+		if (*stackWord != stackMarker) break;
+	}
+
+	size_t stackUsage = usableStackSize - bosOffset;
+
+	emper::stats::worker->recordStackUsage(stackUsage);
+}
diff --git a/emper/Context.hpp b/emper/Context.hpp
index d91702ac0752255fdd825e6b5ef5f466fa416e94..ec4bb37f3f09ff512555719c9302f272a58321d1 100644
--- a/emper/Context.hpp
+++ b/emper/Context.hpp
@@ -95,6 +95,8 @@ class EMPER_CONTEXT_ALIGNAS Context : Logger<LogSubsystem::C> {
 
 	void maybeMarkStack();
 
+	void recordStackUsage();
+
 	static auto calcTos(char* context) -> void*;
 	auto calcTos() -> void*;
 
diff --git a/emper/ContextManager.cpp b/emper/ContextManager.cpp
index ef044ea4fc67283b1da368f7ae20b38a5e741d93..7d99c1aac323436be172dea05237b517cebdc56c 100644
--- a/emper/ContextManager.cpp
+++ b/emper/ContextManager.cpp
@@ -23,15 +23,28 @@ auto ContextManager::getFreeContext() -> Context* {
 	if constexpr (!emper::CONTEXT_MANAGER_WITH_MEMORY_MANAGER) {
 		return new Context(this->runtime.dispatcher.getDispatchLoop());
 	}
+
 	bool malloced;
 	void* memory = getMemory(&malloced);
 	if (malloced) {
 		return new (memory) Context(this->runtime.dispatcher.getDispatchLoop());
 	}
-	return static_cast<Context*>(memory);
+
+	auto* context = static_cast<Context*>(memory);
+	if constexpr (emper::STATS_STACK_USAGE == emper::StatsStackUsage::accurate) {
+		// If we want accurate stack-usage stats, then we need to re-mark
+		// the stack if we got the memory from the memory manager but if
+		// it wasn't malloced.
+		context->maybeMarkStack();
+	}
+	return context;
 }
 
 void ContextManager::putFreeContext(Context* context) {
+	if constexpr (emper::STATS_STACK_USAGE_ENABLED) {
+		context->recordStackUsage();
+	}
+
 	if constexpr (!emper::CONTEXT_MANAGER_WITH_MEMORY_MANAGER) {
 		putMemory(context);
 	} else {
diff --git a/emper/Emper.hpp b/emper/Emper.hpp
index 876429339de81c15eac612a45c8f7958059aa416..25c2c78c744a3f51a42caf9fb4c5d2eeadf1f444 100644
--- a/emper/Emper.hpp
+++ b/emper/Emper.hpp
@@ -25,13 +25,23 @@ void printInfo(std::ostream& strm = std::cout);
 const size_t WS_VICTIM_COUNT = EMPER_WS_VICTIM_COUNT;
 const size_t WS_VICTIM_DENOMINATOR = EMPER_WS_VICTIM_DENOMINATOR;
 
+enum class StatsStackUsage {
+	disabled,
+	efficient,
+	accurate,
+};
+
+const StatsStackUsage STATS_STACK_USAGE = StatsStackUsage::EMPER_STATS_STACK_USAGE;
+
+const bool STATS_STACK_USAGE_ENABLED = STATS_STACK_USAGE != StatsStackUsage::disabled;
+
 const bool STATS =
 #ifdef EMPER_STATS
 		true
 #else
 		false
 #endif
-		;
+		|| STATS_STACK_USAGE_ENABLED;
 
 template <typename C>
 void statsIncr(C& counter) {
diff --git a/emper/lib/util.cpp b/emper/lib/util.cpp
index 935750231356abe227cfd3e0674eaf4080e52187..2a0594b0fd21ff061b6cddf79ee428ef27ec4fbc 100644
--- a/emper/lib/util.cpp
+++ b/emper/lib/util.cpp
@@ -2,7 +2,6 @@
 // Copyright © 2022 Florian Schmaus
 #include "lib/util.hpp"
 
-#include <sstream>
 #include <utility>
 #include <vector>
 
@@ -20,4 +19,15 @@ auto bytesToHumanReadableString(uintptr_t bytes) -> std::string {
 	return sstrm.str();
 }
 
+void writeByteStats(uintptr_t bytes, std::ostream& out) {
+	auto res = tu::toHumanReadableBytes(bytes);
+	out << bytes << " (" << res.first << " " << res.second << ")";
+}
+
+void writeByteStats(const std::string&& name, uintptr_t bytes, std::ostream& out) {
+	out << name << ": ";
+	writeByteStats(bytes, out);
+	out << std::endl;
+}
+
 }	 // namespace emper::lib::util
diff --git a/emper/lib/util.hpp b/emper/lib/util.hpp
index 7be7de4c30c516491ff2414c17c59d18f73e1482..8b0b0b4c5db539ce0e24ca56e099903fdd9b6c8b 100644
--- a/emper/lib/util.hpp
+++ b/emper/lib/util.hpp
@@ -3,10 +3,15 @@
 #pragma once
 
 #include <cstdint>
+#include <ostream>
 #include <string>
 
 namespace emper::lib::util {
 
 auto bytesToHumanReadableString(uintptr_t bytes) -> std::string;
 
-}
+void writeByteStats(uintptr_t bytes, std::ostream& out);
+
+void writeByteStats(const std::string&& name, uintptr_t bytes, std::ostream& out);
+
+}	 // namespace emper::lib::util
diff --git a/emper/stats/Worker.cpp b/emper/stats/Worker.cpp
index ecfc444e12fa76ee34263821dc2cf6b6923ffb77..f60e884300bd40704aece7c8c3e04fdded9ff48c 100644
--- a/emper/stats/Worker.cpp
+++ b/emper/stats/Worker.cpp
@@ -1,17 +1,23 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
-// Copyright © 2021 Florian Schmaus
+// Copyright © 2021-2022 Florian Schmaus
 #include "Worker.hpp"
 
 #include <algorithm>
 #include <string>
 
-using namespace emper::stats;
+#include "Emper.hpp"
+#include "lib/util.hpp"
 
-thread_local Worker* emper::stats::worker = nullptr;
+namespace eu = emper::lib::util;
+
+namespace emper::stats {
+
+thread_local Worker* worker = nullptr;
 
 Worker::Worker(workerid_t workerCount)
 		: unblockAffinitiesGeneric(std::vector<uint32_t>(workerCount)),
-			unblockAffinitiesIo(std::vector<uint32_t>(workerCount)) {}
+			unblockAffinitiesIo(std::vector<uint32_t>(workerCount)),
+			lastStackUsage(32) {}
 
 void Worker::recordBlockedContext(size_t blockedContextCount, BlockablePurpose blockablePurpose) {
 	maxBlockedContexts = std::max(maxBlockedContexts, blockedContextCount);
@@ -38,6 +44,13 @@ void Worker::recordUnblockedContext(workeraffinity_t workerAffinity,
 	}
 }
 
+void Worker::recordStackUsage(size_t stackUsage) {
+	maxStackUsage = std::max(stackUsage, maxStackUsage);
+	minStackUsage = std::min(stackUsage, minStackUsage);
+	averageStackUsage.update(stackUsage);
+	lastStackUsage.push_back(stackUsage);
+}
+
 auto Worker::operator+=(const Worker& other) -> Worker& {
 	maxBlockedContexts = std::max(maxBlockedContexts, other.maxBlockedContexts);
 	blockedGeneric += other.blockedGeneric;
@@ -48,6 +61,17 @@ auto Worker::operator+=(const Worker& other) -> Worker& {
 		unblockAffinitiesGeneric[i] += other.unblockAffinitiesGeneric[i];
 		unblockAffinitiesIo[i] += other.unblockAffinitiesIo[i];
 	}
+
+	if constexpr (emper::STATS_STACK_USAGE_ENABLED) {
+		maxStackUsage = std::max(maxStackUsage, other.maxStackUsage);
+		minStackUsage = std::min(minStackUsage, other.minStackUsage);
+		averageStackUsage.update(other.averageStackUsage.getAverage());
+
+		// We clear the history of last stack usages, as "adding" them
+		// together is not sensible.
+		lastStackUsage.clear();
+	}
+
 	return *this;
 }
 
@@ -64,4 +88,20 @@ void Worker::print(std::ostream& out) {
 		out << "unblock-affinities-io" << std::to_string(i) << ": "
 				<< std::to_string(unblockAffinitiesIo[i]) << std::endl;
 	}
+
+	if constexpr (emper::STATS_STACK_USAGE_ENABLED) {
+		eu::writeByteStats("max-stack-usage", maxStackUsage, out);
+		eu::writeByteStats("min-stack-usage", minStackUsage, out);
+		auto averageStackUsageInteger = static_cast<uintptr_t>(averageStackUsage.getAverage());
+		eu::writeByteStats("avg-stack-usage", averageStackUsageInteger, out);
+		if (!lastStackUsage.empty()) {
+			out << "last-stack-usage: ";
+			for (size_t stackUsage : lastStackUsage) {
+				eu::writeByteStats(stackUsage, out);
+			}
+			out << std::endl;
+		}
+	}
 }
+
+}	 // namespace emper::stats
diff --git a/emper/stats/Worker.hpp b/emper/stats/Worker.hpp
index f49a799d2753677092c10add95e562213d2bf238..d7e1092a41b206406c4192504a35c97230469a65 100644
--- a/emper/stats/Worker.hpp
+++ b/emper/stats/Worker.hpp
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: LGPL-3.0-or-later
-// Copyright © 2021 Florian Schmaus
+// Copyright © 2021-2022 Florian Schmaus
 #pragma once
 
+#include <boost/circular_buffer.hpp>
 #include <cstddef>
 #include <cstdint>
 #include <iostream>
@@ -9,10 +10,12 @@
 
 #include "BlockablePurpose.hpp"
 #include "emper-common.h"
+#include "lib/math.hpp"
 
 enum class LogSubsystem;
 template <LogSubsystem logSubsystem>
 class Blockable;	// IWYU pragma: keep
+class Context;
 class Runtime;
 
 namespace emper::stats {
@@ -33,6 +36,13 @@ class Worker {
 
 	void recordUnblockedContext(workeraffinity_t workerAffinity, BlockablePurpose blockablePurpose);
 
+	size_t maxStackUsage = 0;
+	size_t minStackUsage = SIZE_MAX;
+	emper::lib::math::RunningAverage<double, size_t> averageStackUsage;
+	boost::circular_buffer<size_t> lastStackUsage;
+
+	void recordStackUsage(size_t stackUsage);
+
 	auto operator+=(const Worker& other) -> Worker&;
 
  public:
@@ -40,6 +50,7 @@ class Worker {
 
 	template <LogSubsystem>
 	friend class ::Blockable;
+	friend class ::Context;
 	friend class ::Runtime;
 };
 
diff --git a/meson.build b/meson.build
index b1dc37d3ff7587f07111d11d1f0c3e54fcb0be9d..8928df3906f7fe17e07b3720cb2d359643fd2c87 100644
--- a/meson.build
+++ b/meson.build
@@ -76,6 +76,7 @@ conf_data.set('EMPER_ASSUME_PAGE_SIZE', assume_page_size)
 conf_data.set('EMPER_MIN_CONTEXT_STACK_SIZE', get_option('min_context_stack_size'))
 conf_data.set('EMPER_ASSUME_CACHE_LINE_SIZE', assume_cache_line_size)
 conf_data.set('EMPER_STACK_GUARD_PAGE', get_option('stack_guard_page'))
+conf_data.set('EMPER_STATS_STACK_USAGE', get_option('stats_stack_usage'))
 
 context_alignment = get_option('context_alignment')
 if context_alignment == 'none'
diff --git a/meson_options.txt b/meson_options.txt
index 91a3c666adc3805575185b6ddd6eac17a2fd0c3b..ecca5993f419bc395510b2dc768e88911464bf2e 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -101,6 +101,17 @@ option(
   value: false,
   description: 'Collect stats and print them at the end of the execution'
 )
+option(
+  'stats_stack_usage',
+  type: 'combo',
+  choices: [
+    'disabled',
+    'efficient',
+    'accuraate',
+  ],
+  value: 'disabled',
+  description: 'Collect stack-usage stats via stack watermarking'
+)
 option(
   'default_scheduling_strategy',
   type: 'combo',