diff --git a/apps/meson.build b/apps/meson.build
index 22b360affd5ae5edb962550ca86cf053ffa02a17..6ee5a2d128dc6170326fd80af9597e80edcc8156 100644
--- a/apps/meson.build
+++ b/apps/meson.build
@@ -34,4 +34,10 @@ echoclient_exe = executable(
   dependencies: emper_dep,
+qsort = executable(
+  'qsort',
+  'qsort.cpp',
+  dependencies: emper_dep,
diff --git a/apps/qsort.cpp b/apps/qsort.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8fa25ac192817f212b36199d5b2bb388e8408f87
--- /dev/null
+++ b/apps/qsort.cpp
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+// Copyright © 2021 Florian Fischer
+ * qsort benchmark implementation similar to those used for this blog post:
+ * https://zig.news/kprotty/resource-efficient-thread-pools-with-zig-3291
+ * Comparables benchmark sources (rust, go, zig) can be found at:
+ * https://github.com/kprotty/zap/tree/blog/benchmarks
+ */
+#include <chrono>
+#include <cstdint>
+#include <cstdlib>
+#include <iostream>
+#include "CountingPrivateSemaphore.hpp"
+#include "Fiber.hpp"
+#include "Runtime.hpp"
+#include "emper.hpp"
+using std::chrono::duration_cast;
+using std::chrono::high_resolution_clock;
+using std::chrono::milliseconds;
+static void fill(int* arr, size_t s) {
+	for (int i = 0; static_cast<size_t>(i) < s; ++i) {
+		arr[i] = i;
+	}
+static void swap(int* n, int* m) {
+	int tmp = *n;
+	*n = *m;
+	*m = tmp;
+static void shuffle(int* arr, size_t s) {
+	uint32_t xs = 0xdeadbeef;
+	for (size_t i = 0; i < s; ++i) {
+		xs ^= xs << 13;
+		xs ^= xs >> 17;
+		xs ^= xs << 5;
+		size_t j = xs % (i + 1);
+		swap(&arr[i], &arr[j]);
+	}
+static auto verify(const int* arr, size_t s) -> bool {
+	for (size_t i = 1; i < s; ++i) {
+		if (arr[i - 1] > arr[i]) {
+			return false;
+		}
+	}
+	return true;
+static void insertion_sort(int* arr, size_t s) {
+	for (size_t i = 1; i < s; i++) {
+		size_t n = i;
+		while (n > 0 && arr[n] < arr[n - 1]) {
+			swap(&arr[n], &arr[n - 1]);
+			n -= 1;
+		}
+	}
+static auto partition(int* arr, size_t s) -> size_t {
+	size_t pivot = s - 1;
+	size_t i = 0;
+	for (size_t j = 0; j < pivot; ++j) {
+		if (arr[j] <= arr[pivot]) {
+			swap(&arr[i], &arr[j]);
+			i += 1;
+		}
+	}
+	swap(&arr[i], &arr[pivot]);
+	return i;
+static void qsort(int* arr, size_t s) {
+	if (s <= 32) {
+		insertion_sort(arr, s);
+		return;
+	}
+	size_t mid = partition(arr, s);
+	CPS cps;
+	spawn([&] { qsort(arr, mid); }, cps);
+	spawn([&] { qsort(&arr[mid], s - mid); }, cps);
+	cps.wait();
+static const size_t ARR_SIZE = 10 * 1000 * 1000;
+auto main() -> int {
+	int* arr = new int[ARR_SIZE];
+	std::cout << "filling" << std::endl;
+	fill(arr, ARR_SIZE);
+	std::cout << "shuffling" << std::endl;
+	shuffle(arr, ARR_SIZE);
+	Runtime runtime;
+	auto* sorter = Fiber::from([&]() {
+		const auto start = std::chrono::steady_clock::now();
+		qsort(arr, ARR_SIZE);
+		const auto end = std::chrono::steady_clock::now();
+		auto ms = duration_cast<milliseconds>(end - start);
+		std::cout << "sorting took " << ms.count() << "ms" << std::endl;
+		runtime.initiateTermination();
+	});
+	runtime.scheduleFromAnywhere(*sorter);
+	runtime.waitUntilFinished();
+	int exit_code = EXIT_SUCCESS;
+	if (!verify(arr, ARR_SIZE)) {
+		std::cerr << "Array is not sorted" << std::endl;
+		exit_code = EXIT_FAILURE;
+	}
+	delete[] arr;
+	return exit_code;