From a2c02de99ec3bcb5d9f1acd0b7832b76d6abc162 Mon Sep 17 00:00:00 2001 From: Florian Fischer <florian.fischer@muhq.space> Date: Wed, 29 Sep 2021 09:16:04 +0200 Subject: [PATCH] add gdb scripts to dump the runtime state In a running gdb process use: source tools/gdb/dump_runtime_state.py to dump the state of all threads, all WSL queues and all worker IoContexts. --- .gitignore | 2 + emper/io/IoContext.cpp | 27 ++++ emper/io/IoContext.hpp | 19 ++- tools/gdb/__init__.py | 0 tools/gdb/dump_runtime_state.py | 213 +++++++++++++++++++++++++++++++ tools/gdb/worker_frame_filter.py | 40 ++++++ 6 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 tools/gdb/__init__.py create mode 100644 tools/gdb/dump_runtime_state.py create mode 100644 tools/gdb/worker_frame_filter.py diff --git a/.gitignore b/.gitignore index f9d14aec..be4498e2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ /.cache/ /.clangd/ +tools/gdb/__pycache__/ + subprojects/packagecache/ subprojects/googletest* subprojects/liburing* diff --git a/emper/io/IoContext.cpp b/emper/io/IoContext.cpp index 5dbeb52d..a11c88c2 100644 --- a/emper/io/IoContext.cpp +++ b/emper/io/IoContext.cpp @@ -524,4 +524,31 @@ IoContext::~IoContext() { delete submitter; } + +auto IoContext::getSqHead() const -> unsigned { return *ring.sq.khead; } +auto IoContext::getSqTail() const -> unsigned { return *ring.sq.ktail; } +auto IoContext::getSqEntries() const -> unsigned { return *ring.sq.kring_entries; } +auto IoContext::getSqFlags() const -> unsigned { return *ring.sq.kflags; } + +auto IoContext::getCqHead() const -> unsigned { return *ring.cq.khead; } +auto IoContext::getCqHeadSafe() const -> unsigned { + return reinterpret_cast<std::atomic<unsigned> *>(ring.cq.khead)->load(); +} + +auto IoContext::getCqTail() const -> unsigned { return *ring.cq.ktail; } +auto IoContext::getCqTailSafe() const -> unsigned { + return reinterpret_cast<std::atomic<unsigned> *>(ring.cq.ktail)->load(); +} + +auto IoContext::getCqEntries() const -> unsigned { return *ring.cq.kring_entries; } + +auto IoContext::getCqe(unsigned i) const -> struct io_uring_cqe { + const unsigned mask = *ring.cq.kring_mask; + const unsigned head = getCqHead(); + return ring.cq.cqes[(head + i) & mask]; +} + +auto IoContext::getCqFlags() const -> unsigned { + return *ring.sq.kflags; +} } // namespace emper::io diff --git a/emper/io/IoContext.hpp b/emper/io/IoContext.hpp index 05fe4f8c..8aa792ee 100644 --- a/emper/io/IoContext.hpp +++ b/emper/io/IoContext.hpp @@ -2,7 +2,8 @@ // Copyright © 2020-2021 Florian Fischer #pragma once -#include <liburing.h> // for io_uring +#include <liburing.h> +#include <liburing/io_uring.h> #include <array> #include <atomic> // for atomic @@ -48,6 +49,22 @@ class IoContext : public Logger<LogSubsystem::IO> { friend class emper::sleep_strategy::PipeSleepStrategy; + // Debug functions to access the mmaped memory of ring. + // gdb is not allowed to access the io mmaped memory of the io_uring fd. + // https://stackoverflow.com/questions/67451177/why-cant-gdb-read-io-uring-cqe-contents + auto getSqHead() const -> unsigned; + auto getSqTail() const -> unsigned; + auto getSqEntries() const -> unsigned; + auto getSqFlags() const -> unsigned; + + auto getCqHead() const -> unsigned; + auto getCqHeadSafe() const -> unsigned; + auto getCqTail() const -> unsigned; + auto getCqTailSafe() const -> unsigned; + auto getCqEntries() const -> unsigned; + auto getCqe(unsigned i) const -> struct io_uring_cqe; + auto getCqFlags() const -> unsigned; + protected: // Remember the Runtime which created the IoContext Runtime &runtime; diff --git a/tools/gdb/__init__.py b/tools/gdb/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/gdb/dump_runtime_state.py b/tools/gdb/dump_runtime_state.py new file mode 100644 index 00000000..ae1a6d9e --- /dev/null +++ b/tools/gdb/dump_runtime_state.py @@ -0,0 +1,213 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +# Copyright 2021 Florian Fischer +"""gdb python script to dump the state of the runtime""" + +from pathlib import Path +import sys + +import gdb + +gdb_py_dir = Path(__file__).parent +sys.path.append(str(gdb_py_dir)) +from worker_frame_filter import WorkerFrameFilter + + +INDENTATION = ' ' + +def setup(): + gdb.execute('set pagination off') + gdb.execute('set print pretty') + gdb.execute('set scheduler-locking on') + + +def cleanup(): + gdb.execute('set pagination on') + gdb.execute('set print pretty off') + gdb.execute('set scheduler-locking off') + + +def print_queue(queue, indentation=''): + print(f'{indentation}WSL:') + indentation += INDENTATION + + top = queue['top']['_M_i'] + print(f'{indentation}top: {top}') + + bottom = queue['bottom']['_M_i'] + print(f'{indentation}bottom: {bottom}') + + print(f'{indentation}work: {bottom - top}') + + +def print_sq(io, indentation=''): + print(f'{indentation}sq:') + indentation += INDENTATION + sq = io['ring']['sq'] + + print(f'{indentation}ring_ptr: {sq["ring_ptr"]}') + entries = gdb.parse_and_eval("emper::io::IoContext::workerIo->getSqEntries()") + print(f'{indentation}entries: {entries}') + + flags = gdb.parse_and_eval("emper::io::IoContext::workerIo->getSqFlags()") + print(f'{indentation}flags: {flags}') + + head = gdb.parse_and_eval("emper::io::IoContext::workerIo->getSqHead()") + print(f'{indentation}head: {head}') + + tail = gdb.parse_and_eval("emper::io::IoContext::workerIo->getSqTail()") + print(f'{indentation}tail: {tail}') + + unsubmitted = tail - head + print(f'{indentation}unsubmitted: {unsubmitted}') + + # print(sq) + + +def print_cq(io, indentation=''): + print(f'{indentation}cq:') + indentation += INDENTATION + cq = io['ring']['cq'] + + print(f'{indentation}ring_ptr: {cq["ring_ptr"]}') + entries = gdb.parse_and_eval("emper::io::IoContext::workerIo->getCqEntries()") + print(f'{indentation}entries: {entries}') + + flags = gdb.parse_and_eval("emper::io::IoContext::workerIo->getCqFlags()") + print(f'{indentation}flags: {flags}') + + head = gdb.parse_and_eval("emper::io::IoContext::workerIo->getCqHeadSafe()") + print(f'{indentation}head: {head}') + + tail = gdb.parse_and_eval("emper::io::IoContext::workerIo->getCqTailSafe()") + print(f'{indentation}tail: {tail}') + + ready = tail - head + print(f'{indentation}ready: {ready}') + + # print(io['ring']['cq']) + + +def print_uring(io, indentation=''): + uring = io['ring'] + print(f'{indentation}io_uring:') + indentation += INDENTATION + + print(f'{indentation}ring_fd: {uring["ring_fd"]}') + print(f'{indentation}flags: {uring["flags"]}') + print(f'{indentation}features: {uring["features"]}') + + print_sq(io, indentation) + print_cq(io, indentation) + + +def print_io(io, indentation=''): + print(f'{indentation}IoContext:') + indentation += INDENTATION + + print(f'{indentation}needsCqLock: {io["needsCqLock"]}') + print(f'{indentation}locklessCq: {io["locklessCq"]}') + + print(f'{indentation}notificationEventFd: {io["notificationEventFd"]}') + + submitter = io["submitter"] + if not submitter.address: + print(f'{indentation}submitter: {submitter}') + + print(f'{indentation}waitInflight: {io["waitInflight"]["_M_base"]["_M_i"]}') + # print(f'{indentation}{io["CQE_BATCH_SIZE"]}') + print(f'{indentation}reqs_in_uring: {io["reqs_in_uring"]["_M_i"]}') + print(f'{indentation}preparedSqes: {io["preparedSqes"]}') + print(f'{indentation}uringFutureSet: {io["uringFutureSet"]["_set"]}') + + print_uring(io, indentation=indentation) + + +def print_anywhere_queue(runtime, indentation=''): + print(f'{indentation}AnywhereQueue:') + indentation += INDENTATION + scheduler = runtime["scheduler"] + print(f'{indentation}{scheduler["scheduleAnywhereQueue"]["queue"]}') + + +def print_stats(runtime, indentation=''): + print(f'{indentation}Stats:') + indentation += INDENTATION + print_stats_cmd = f'p ((Runtime*){runtime.address})->printStats(std::cout, false)' + stats_str = gdb.execute(print_stats_cmd, False, True) + for l in stats_str.splitlines(): + print(f'{indentation}{l}') + + +def print_bt(): + print(gdb.execute("bt", False, True)) + + +def dump(): + printed_once = False + + threads = gdb.selected_inferior().threads() + for thread in threads: + # switch to thread + thread.switch() + + # A thread may be in a frame from a non emper source file and + # thus gdb can not use the emper namespace. + # Our solution is to walk up the stack until we are in a emper function + emper_found = False + cur = gdb.selected_frame() + while (True): + cur_sym_tab = cur.find_sal().symtab + if cur_sym_tab and 'emper' in cur_sym_tab.filename: + emper_found = True + break + + next = cur.older() + if not next: + break + next.select() + cur = next + + if not emper_found: + print(f"Thread {thread.name} found not executing any emper code") + print_bt() + continue + + runtime = gdb.parse_and_eval("Runtime::currentRuntime").dereference() + if not runtime: + print(f"Thread {thread} found without an active runtime") + print_bt() + continue + + if not printed_once: + print_anywhere_queue(runtime) + print() + print_stats(runtime) + print() + printed_once = True + + worker = gdb.parse_and_eval("Worker::currentWorker").dereference() + if worker.address == 0: + print(f"Non worker thread {thread.name}") + print_bt() + continue + + worker_id = worker['workerId'] + print(f"## Worker {worker_id} ##") + print_bt() + + queue = gdb.parse_and_eval("AbstractWorkStealingScheduler::queue") + print_queue(queue) + print() + + io = gdb.parse_and_eval("emper::io::IoContext::workerIo").dereference() + print_io(io) + print() + + + +if __name__ == '__main__': + # install frame filter + WorkerFrameFilter() + setup() + dump() + cleanup() diff --git a/tools/gdb/worker_frame_filter.py b/tools/gdb/worker_frame_filter.py new file mode 100644 index 00000000..92558fe8 --- /dev/null +++ b/tools/gdb/worker_frame_filter.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +# Copyright 2021 Florian Fischer +import gdb + +class WorkerFrameFilter(): + """Frame filter wrapping the frame iterator into an WorkerFrameIterator""" + def __init__(self): + self.name = "WorkerFrameFilter" + self.priority = 100 + self.enabled = True + gdb.frame_filters[self.name] = self + + def filter(self, frame_iter): + return WorkerFrameIterator(frame_iter) + + +class WorkerFrameIterator: + """Frame iterator skiping each frame without a valid name and function + + This is usefull in EMPER because worker stacks are allocated by emper and not glibc + and are not properly walked by gdb resulting in multiple 'broken' frames above + the dispatch loop. + """ + def __init__(self, ii): + self.input_iterator = ii + + def __iter__(self): + return self + + def __next__(self): + while True: + frameDecorator = next(self.input_iterator) + frame = frameDecorator.inferior_frame() + + if frame.name() or frame.function(): + return frameDecorator + + +if __name__ == '__main__': + WorkerFrameFilter() -- GitLab