From ddcb268884b0342f5bb3b4961b0bf5d5b0840bd7 Mon Sep 17 00:00:00 2001
From: Florian Fischer <florian.fischer@muhq.space>
Date: Tue, 17 May 2022 10:58:00 +0200
Subject: [PATCH] add io_uring batch variant

Queue-based interfaces are able to spread the syscall overhead across
multiple requests.
Add a new io_uring variant submitting 10 read requests with a single
io_uring_enter call.

Adapt bench.c and all other variants to support variable numbers of
read requests issued by do_read(...).
---
 Makefile              |  4 +++-
 bench.c               | 16 +++++++++++-----
 blocking.c            |  3 ++-
 epoll.c               |  3 ++-
 io_uring.c            |  3 ++-
 io_uring_batch.c      | 40 ++++++++++++++++++++++++++++++++++++++++
 io_uring_no_syscall.c |  3 ++-
 io_uring_sqpoll.c     |  3 ++-
 linux_aio.c           |  3 ++-
 paio_sig.c            |  3 ++-
 paio_thrd.c           |  3 ++-
 11 files changed, 70 insertions(+), 14 deletions(-)
 create mode 100644 io_uring_batch.c

diff --git a/Makefile b/Makefile
index 8e77a20..8b5ec8f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,7 @@
 BENCH_MAIN := bench.c
-SYSCALLS := blocking io_uring io_uring_sqpoll io_uring_no_syscall epoll paio_sig paio_thrd linux_aio
+SYSCALLS := blocking epoll \
+            paio_sig paio_thrd linux_aio \
+            io_uring io_uring_batch io_uring_sqpoll io_uring_no_syscall 
 
 OBJ := $(addprefix bench-,$(SYSCALLS))
 
diff --git a/bench.c b/bench.c
index 9004db3..ea046c1 100644
--- a/bench.c
+++ b/bench.c
@@ -13,7 +13,7 @@
 #include "stopwatch.h"
 
 void init(int fd);
-void do_read(int fd, void *buf, size_t count);
+unsigned do_read(int fd, void *buf, size_t count);
 
 unsigned warmup = 10000;
 unsigned iterations = 1000000;
@@ -73,10 +73,16 @@ int main(int argc, char *argv[]) {
 
 	uint64_t proc_time_start = get_proc_time();
 
-	for (unsigned i = 1; i <= exp_iterations; ++i) {
-		do_read(fd, &read_buf, sizeof(read_buf));
-		nanos[i - 1] = clock_diff_nanos();
-		cycles[i - 1] = clock_diff_cycles();
+	unsigned i = 1;
+	while (i <= exp_iterations) {
+		unsigned done_reads = do_read(fd, &read_buf, sizeof(read_buf));
+		uint64_t nanos_diff = clock_diff_nanos() / done_reads;
+		uint64_t cycles_diff = clock_diff_cycles() / done_reads;
+		for (; done_reads > 0; --done_reads) {
+			nanos[i - 1] = nanos_diff;
+			cycles[i - 1] = cycles_diff;
+			++i;
+		}
 	}
 	uint64_t proc_time_end = get_proc_time();
 	uint64_t proc_time_used = proc_time_end - proc_time_start;
diff --git a/blocking.c b/blocking.c
index fddb88a..f4e7ae6 100644
--- a/blocking.c
+++ b/blocking.c
@@ -6,10 +6,11 @@
 
 void init(__attribute__((unused)) int fd) {}
 
-void do_read(int fd, void *buf, size_t count) {
+unsigned do_read(int fd, void *buf, size_t count) {
 	start_watch();
 	ssize_t res = read(fd, buf, count);
 	stop_watch();
 
 	if (res == -1) err(EXIT_FAILURE, "read failed");
+	return 1;
 }
diff --git a/epoll.c b/epoll.c
index 8ec03ef..3a57e62 100644
--- a/epoll.c
+++ b/epoll.c
@@ -18,7 +18,7 @@ void init(int fd) {
 	if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) err(EXIT_FAILURE, "epoll_ctl failed");
 }
 
-void do_read(int fd, void *buf, size_t count) {
+unsigned do_read(int fd, void *buf, size_t count) {
 	start_watch();
 	nfds = epoll_wait(epollfd, &ev, 1, -1);
 	size_t res = read(fd, buf, count);
@@ -29,4 +29,5 @@ void do_read(int fd, void *buf, size_t count) {
 	if (ev.data.fd != fd) errx(EXIT_FAILURE, "got unexpected fd from epoll");
 
 	if (res == -1) err(EXIT_FAILURE, "read failed");
+	return 1;
 }
diff --git a/io_uring.c b/io_uring.c
index 20cf4de..101544e 100644
--- a/io_uring.c
+++ b/io_uring.c
@@ -15,7 +15,7 @@ void init(__attribute__((unused)) int fd) {
 	}
 }
 
-void do_read(int fd, void *buf, size_t count) {
+unsigned do_read(int fd, void *buf, size_t count) {
 	start_watch();
 
 	struct io_uring_sqe *sqe = io_uring_get_sqe(&ring);
@@ -32,4 +32,5 @@ void do_read(int fd, void *buf, size_t count) {
 	if (res < 0) err(EXIT_FAILURE, "io_uring_peek_cqe failed");
 
 	if (cqe->res < 0) err(EXIT_FAILURE, "read request failed");
+	return 1;
 }
diff --git a/io_uring_batch.c b/io_uring_batch.c
new file mode 100644
index 0000000..552c5e3
--- /dev/null
+++ b/io_uring_batch.c
@@ -0,0 +1,40 @@
+#include <err.h>
+#include <errno.h>
+#include <liburing.h>
+#include <stdlib.h>
+
+#include "stopwatch.h"
+
+struct io_uring ring;
+
+static const unsigned BATCH_SIZE = 10;
+
+void init(__attribute__((unused)) int fd) {
+	int res = io_uring_queue_init(BATCH_SIZE, &ring, 0);
+	if (res < 0) {
+		errno = -res;
+		err(EXIT_FAILURE, "io_uring_setup failed");
+	}
+}
+
+unsigned do_read(int fd, void *buf, size_t count) {
+	start_watch();
+
+	for (unsigned i = 0; i < BATCH_SIZE; ++i) {
+		struct io_uring_sqe *sqe = io_uring_get_sqe(&ring);
+		io_uring_prep_read(sqe, fd, buf, count, 0);
+	}
+
+	int res = io_uring_submit_and_wait(&ring, BATCH_SIZE);
+
+	stop_watch();
+
+	if (res < 0) err(EXIT_FAILURE, "io_uring_submit_and_wait failed");
+
+	struct io_uring_cqe *cqe;
+	res = io_uring_peek_cqe(&ring, &cqe);
+	if (res < 0) err(EXIT_FAILURE, "io_uring_peek_cqe failed");
+
+	if (cqe->res < 0) err(EXIT_FAILURE, "read request failed");
+	return BATCH_SIZE;
+}
diff --git a/io_uring_no_syscall.c b/io_uring_no_syscall.c
index f4ab18b..6c65127 100644
--- a/io_uring_no_syscall.c
+++ b/io_uring_no_syscall.c
@@ -11,7 +11,7 @@ struct io_uring ring;
 
 void init(__attribute__((unused)) int fd) { io_uring_init_sqpoll(&ring); }
 
-void do_read(int fd, void *buf, size_t count) {
+unsigned do_read(int fd, void *buf, size_t count) {
 	struct io_uring_sqe *sqe = sqpoll_ring_get_sqe(&ring);
 	io_uring_prep_read(sqe, fd, buf, count, 0);
 
@@ -27,4 +27,5 @@ void do_read(int fd, void *buf, size_t count) {
 	if (res < 0) err(EXIT_FAILURE, "io_submit failed");
 
 	if (cqe->res < 0) err(EXIT_FAILURE, "read request failed");
+	return 1;
 }
diff --git a/io_uring_sqpoll.c b/io_uring_sqpoll.c
index 61230ff..15a5cc0 100644
--- a/io_uring_sqpoll.c
+++ b/io_uring_sqpoll.c
@@ -11,7 +11,7 @@ struct io_uring ring;
 
 void init(__attribute__((unused)) int fd) { io_uring_init_sqpoll(&ring); }
 
-void do_read(int fd, void *buf, size_t count) {
+unsigned do_read(int fd, void *buf, size_t count) {
 	struct io_uring_sqe *sqe = sqpoll_ring_get_sqe(&ring);
 	io_uring_prep_read(sqe, fd, buf, count, 0);
 
@@ -29,4 +29,5 @@ void do_read(int fd, void *buf, size_t count) {
 	if (res < 0) err(EXIT_FAILURE, "io_uring_peek_cqe failed");
 
 	if (cqe->res < 0) err(EXIT_FAILURE, "read request failed");
+	return 1;
 }
diff --git a/linux_aio.c b/linux_aio.c
index a7240ac..e2b12f0 100644
--- a/linux_aio.c
+++ b/linux_aio.c
@@ -16,7 +16,7 @@ void init(__attribute__((unused)) int fd) {
 	}
 }
 
-void do_read(int fd, const void *buf, size_t count) {
+unsigned do_read(int fd, const void *buf, size_t count) {
 	struct io_event ev;
 	struct iocb io;
 	struct iocb *ioq[] = {&io};
@@ -37,4 +37,5 @@ void do_read(int fd, const void *buf, size_t count) {
 	if (w_res < 0) err(EXIT_FAILURE, "io_getevents failed");
 
 	if (ev.res2 != 0) err(EXIT_FAILURE, "read request failed");
+	return 1;
 }
diff --git a/paio_sig.c b/paio_sig.c
index d545f20..1dc578c 100644
--- a/paio_sig.c
+++ b/paio_sig.c
@@ -31,7 +31,7 @@ void init(int fd) {
 	if (sigaction(SIGUSR1, &sa, NULL) == -1) err(EXIT_FAILURE, "sigaction failed");
 }
 
-void do_read(int fd, void *buf, size_t count) {
+unsigned do_read(int fd, void *buf, size_t count) {
 	aiocb.aio_buf = buf;
 	aiocb.aio_nbytes = count;
 
@@ -44,4 +44,5 @@ void do_read(int fd, void *buf, size_t count) {
 	}
 	stop_watch();
 	atomic_store(&done, 1);
+	return 1;
 }
diff --git a/paio_thrd.c b/paio_thrd.c
index e04201f..ed69f11 100644
--- a/paio_thrd.c
+++ b/paio_thrd.c
@@ -29,7 +29,7 @@ void init(int fd) {
 	aiocb.aio_sigevent.sigev_value.sival_int = 42;
 }
 
-void do_read(int fd, void *buf, size_t count) {
+unsigned do_read(int fd, void *buf, size_t count) {
 	aiocb.aio_buf = buf;
 	aiocb.aio_nbytes = count;
 
@@ -41,4 +41,5 @@ void do_read(int fd, void *buf, size_t count) {
 	atomic_store(&done, 0);
 
 	if (res == -1) err(EXIT_FAILURE, "aio_read failed");
+	return 1;
 }
-- 
GitLab