From a2c02de99ec3bcb5d9f1acd0b7832b76d6abc162 Mon Sep 17 00:00:00 2001
From: Florian Fischer <florian.fischer@muhq.space>
Date: Wed, 29 Sep 2021 09:16:04 +0200
Subject: [PATCH] add gdb scripts to dump the runtime state

In a running gdb process use:

source tools/gdb/dump_runtime_state.py

to dump the state of all threads, all WSL queues and all worker IoContexts.
---
 .gitignore                       |   2 +
 emper/io/IoContext.cpp           |  27 ++++
 emper/io/IoContext.hpp           |  19 ++-
 tools/gdb/__init__.py            |   0
 tools/gdb/dump_runtime_state.py  | 213 +++++++++++++++++++++++++++++++
 tools/gdb/worker_frame_filter.py |  40 ++++++
 6 files changed, 300 insertions(+), 1 deletion(-)
 create mode 100644 tools/gdb/__init__.py
 create mode 100644 tools/gdb/dump_runtime_state.py
 create mode 100644 tools/gdb/worker_frame_filter.py

diff --git a/.gitignore b/.gitignore
index f9d14aec..be4498e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@
 /.cache/
 /.clangd/
 
+tools/gdb/__pycache__/
+
 subprojects/packagecache/
 subprojects/googletest*
 subprojects/liburing*
diff --git a/emper/io/IoContext.cpp b/emper/io/IoContext.cpp
index 5dbeb52d..a11c88c2 100644
--- a/emper/io/IoContext.cpp
+++ b/emper/io/IoContext.cpp
@@ -524,4 +524,31 @@ IoContext::~IoContext() {
 
 	delete submitter;
 }
+
+auto IoContext::getSqHead() const -> unsigned { return *ring.sq.khead; }
+auto IoContext::getSqTail() const -> unsigned { return *ring.sq.ktail; }
+auto IoContext::getSqEntries() const -> unsigned { return *ring.sq.kring_entries; }
+auto IoContext::getSqFlags() const -> unsigned { return *ring.sq.kflags; }
+
+auto IoContext::getCqHead() const -> unsigned { return *ring.cq.khead; }
+auto IoContext::getCqHeadSafe() const -> unsigned {
+	return reinterpret_cast<std::atomic<unsigned> *>(ring.cq.khead)->load();
+}
+
+auto IoContext::getCqTail() const -> unsigned { return *ring.cq.ktail; }
+auto IoContext::getCqTailSafe() const -> unsigned {
+	return reinterpret_cast<std::atomic<unsigned> *>(ring.cq.ktail)->load();
+}
+
+auto IoContext::getCqEntries() const -> unsigned { return *ring.cq.kring_entries; }
+
+auto IoContext::getCqe(unsigned i) const -> struct io_uring_cqe {
+	const unsigned mask = *ring.cq.kring_mask;
+	const unsigned head = getCqHead();
+	return ring.cq.cqes[(head + i) & mask];
+}
+
+auto IoContext::getCqFlags() const -> unsigned {
+	return *ring.sq.kflags;
+}
 }	 // namespace emper::io
diff --git a/emper/io/IoContext.hpp b/emper/io/IoContext.hpp
index 05fe4f8c..8aa792ee 100644
--- a/emper/io/IoContext.hpp
+++ b/emper/io/IoContext.hpp
@@ -2,7 +2,8 @@
 // Copyright © 2020-2021 Florian Fischer
 #pragma once
 
-#include <liburing.h>	 // for io_uring
+#include <liburing.h>
+#include <liburing/io_uring.h>
 
 #include <array>
 #include <atomic>			 // for atomic
@@ -48,6 +49,22 @@ class IoContext : public Logger<LogSubsystem::IO> {
 
 	friend class emper::sleep_strategy::PipeSleepStrategy;
 
+	// Debug functions to access the mmaped memory of ring.
+	// gdb is not allowed to access the io mmaped memory of the io_uring fd.
+	// https://stackoverflow.com/questions/67451177/why-cant-gdb-read-io-uring-cqe-contents
+	auto getSqHead() const -> unsigned;
+	auto getSqTail() const -> unsigned;
+	auto getSqEntries() const -> unsigned;
+	auto getSqFlags() const -> unsigned;
+
+	auto getCqHead() const -> unsigned;
+	auto getCqHeadSafe() const -> unsigned;
+	auto getCqTail() const -> unsigned;
+	auto getCqTailSafe() const -> unsigned;
+	auto getCqEntries() const -> unsigned;
+	auto getCqe(unsigned i) const -> struct io_uring_cqe;
+	auto getCqFlags() const -> unsigned;
+
  protected:
 	// Remember the Runtime which created the IoContext
 	Runtime &runtime;
diff --git a/tools/gdb/__init__.py b/tools/gdb/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tools/gdb/dump_runtime_state.py b/tools/gdb/dump_runtime_state.py
new file mode 100644
index 00000000..ae1a6d9e
--- /dev/null
+++ b/tools/gdb/dump_runtime_state.py
@@ -0,0 +1,213 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2021 Florian Fischer
+"""gdb python script to dump the state of the runtime"""
+
+from pathlib import Path
+import sys
+
+import gdb
+
+gdb_py_dir = Path(__file__).parent
+sys.path.append(str(gdb_py_dir))
+from worker_frame_filter import WorkerFrameFilter
+
+
+INDENTATION = '  '
+
+def setup():
+    gdb.execute('set pagination off')
+    gdb.execute('set print pretty')
+    gdb.execute('set scheduler-locking on')
+
+
+def cleanup():
+    gdb.execute('set pagination on')
+    gdb.execute('set print pretty off')
+    gdb.execute('set scheduler-locking off')
+
+
+def print_queue(queue, indentation=''):
+    print(f'{indentation}WSL:')
+    indentation += INDENTATION
+
+    top = queue['top']['_M_i']
+    print(f'{indentation}top: {top}')
+
+    bottom = queue['bottom']['_M_i']
+    print(f'{indentation}bottom: {bottom}')
+
+    print(f'{indentation}work: {bottom - top}')
+
+
+def print_sq(io, indentation=''):
+    print(f'{indentation}sq:')
+    indentation += INDENTATION
+    sq = io['ring']['sq']
+
+    print(f'{indentation}ring_ptr: {sq["ring_ptr"]}')
+    entries = gdb.parse_and_eval("emper::io::IoContext::workerIo->getSqEntries()")
+    print(f'{indentation}entries: {entries}')
+
+    flags = gdb.parse_and_eval("emper::io::IoContext::workerIo->getSqFlags()")
+    print(f'{indentation}flags: {flags}')
+
+    head = gdb.parse_and_eval("emper::io::IoContext::workerIo->getSqHead()")
+    print(f'{indentation}head: {head}')
+
+    tail = gdb.parse_and_eval("emper::io::IoContext::workerIo->getSqTail()")
+    print(f'{indentation}tail: {tail}')
+
+    unsubmitted = tail - head
+    print(f'{indentation}unsubmitted: {unsubmitted}')
+
+    # print(sq)
+
+
+def print_cq(io, indentation=''):
+    print(f'{indentation}cq:')
+    indentation += INDENTATION
+    cq = io['ring']['cq']
+
+    print(f'{indentation}ring_ptr: {cq["ring_ptr"]}')
+    entries = gdb.parse_and_eval("emper::io::IoContext::workerIo->getCqEntries()")
+    print(f'{indentation}entries: {entries}')
+
+    flags = gdb.parse_and_eval("emper::io::IoContext::workerIo->getCqFlags()")
+    print(f'{indentation}flags: {flags}')
+
+    head = gdb.parse_and_eval("emper::io::IoContext::workerIo->getCqHeadSafe()")
+    print(f'{indentation}head: {head}')
+
+    tail = gdb.parse_and_eval("emper::io::IoContext::workerIo->getCqTailSafe()")
+    print(f'{indentation}tail: {tail}')
+
+    ready = tail - head
+    print(f'{indentation}ready: {ready}')
+
+    # print(io['ring']['cq'])
+
+
+def print_uring(io, indentation=''):
+    uring = io['ring']
+    print(f'{indentation}io_uring:')
+    indentation += INDENTATION
+
+    print(f'{indentation}ring_fd: {uring["ring_fd"]}')
+    print(f'{indentation}flags: {uring["flags"]}')
+    print(f'{indentation}features: {uring["features"]}')
+
+    print_sq(io, indentation)
+    print_cq(io, indentation)
+
+
+def print_io(io, indentation=''):
+    print(f'{indentation}IoContext:')
+    indentation += INDENTATION
+
+    print(f'{indentation}needsCqLock: {io["needsCqLock"]}')
+    print(f'{indentation}locklessCq: {io["locklessCq"]}')
+
+    print(f'{indentation}notificationEventFd: {io["notificationEventFd"]}')
+
+    submitter = io["submitter"]
+    if not submitter.address:
+        print(f'{indentation}submitter: {submitter}')
+
+    print(f'{indentation}waitInflight: {io["waitInflight"]["_M_base"]["_M_i"]}')
+    # print(f'{indentation}{io["CQE_BATCH_SIZE"]}')
+    print(f'{indentation}reqs_in_uring: {io["reqs_in_uring"]["_M_i"]}')
+    print(f'{indentation}preparedSqes: {io["preparedSqes"]}')
+    print(f'{indentation}uringFutureSet: {io["uringFutureSet"]["_set"]}')
+
+    print_uring(io, indentation=indentation)
+
+
+def print_anywhere_queue(runtime, indentation=''):
+    print(f'{indentation}AnywhereQueue:')
+    indentation += INDENTATION
+    scheduler = runtime["scheduler"]
+    print(f'{indentation}{scheduler["scheduleAnywhereQueue"]["queue"]}')
+
+
+def print_stats(runtime, indentation=''):
+    print(f'{indentation}Stats:')
+    indentation += INDENTATION
+    print_stats_cmd = f'p ((Runtime*){runtime.address})->printStats(std::cout, false)'
+    stats_str = gdb.execute(print_stats_cmd, False, True)
+    for l in stats_str.splitlines():
+        print(f'{indentation}{l}')
+
+
+def print_bt():
+    print(gdb.execute("bt", False, True))
+
+
+def dump():
+    printed_once = False
+
+    threads = gdb.selected_inferior().threads()
+    for thread in threads:
+        # switch to thread
+        thread.switch()
+
+        # A thread may be in a frame from a non emper source file and
+        # thus gdb can not use the emper namespace.
+        # Our solution is to walk up the stack until we are in a emper function
+        emper_found = False
+        cur = gdb.selected_frame()
+        while (True):
+            cur_sym_tab = cur.find_sal().symtab
+            if cur_sym_tab and 'emper' in cur_sym_tab.filename:
+                emper_found = True
+                break
+
+            next = cur.older()
+            if not next:
+                break
+            next.select()
+            cur = next
+
+        if not emper_found:
+            print(f"Thread {thread.name} found not executing any emper code")
+            print_bt()
+            continue
+
+        runtime = gdb.parse_and_eval("Runtime::currentRuntime").dereference()
+        if not runtime:
+            print(f"Thread {thread} found without an active runtime")
+            print_bt()
+            continue
+
+        if not printed_once:
+            print_anywhere_queue(runtime)
+            print()
+            print_stats(runtime)
+            print()
+            printed_once = True
+
+        worker = gdb.parse_and_eval("Worker::currentWorker").dereference()
+        if worker.address == 0:
+            print(f"Non worker thread {thread.name}")
+            print_bt()
+            continue
+
+        worker_id = worker['workerId']
+        print(f"## Worker {worker_id} ##")
+        print_bt()
+
+        queue = gdb.parse_and_eval("AbstractWorkStealingScheduler::queue")
+        print_queue(queue)
+        print()
+
+        io = gdb.parse_and_eval("emper::io::IoContext::workerIo").dereference()
+        print_io(io)
+        print()
+
+
+
+if __name__ == '__main__':
+    # install frame filter
+    WorkerFrameFilter()
+    setup()
+    dump()
+    cleanup()
diff --git a/tools/gdb/worker_frame_filter.py b/tools/gdb/worker_frame_filter.py
new file mode 100644
index 00000000..92558fe8
--- /dev/null
+++ b/tools/gdb/worker_frame_filter.py
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+# Copyright 2021 Florian Fischer
+import gdb
+
+class WorkerFrameFilter():
+    """Frame filter wrapping the frame iterator into an WorkerFrameIterator"""
+    def __init__(self):
+        self.name = "WorkerFrameFilter"
+        self.priority = 100
+        self.enabled = True
+        gdb.frame_filters[self.name] = self
+
+    def filter(self, frame_iter):
+        return WorkerFrameIterator(frame_iter)
+
+
+class WorkerFrameIterator:
+    """Frame iterator skiping each frame without a valid name and function
+    
+    This is usefull in EMPER because worker stacks are allocated by emper and not glibc
+    and are not properly walked by gdb resulting in multiple 'broken' frames above
+    the dispatch loop.
+    """
+    def __init__(self, ii):
+        self.input_iterator = ii
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        while True:
+            frameDecorator = next(self.input_iterator)
+            frame = frameDecorator.inferior_frame()
+
+            if frame.name() or frame.function():
+                return frameDecorator
+
+
+if __name__ == '__main__':
+    WorkerFrameFilter()
-- 
GitLab