diff --git a/Makefile b/Makefile index 9768467ebc079a3d9844a70c8e0fea30e2c1c85d..6942f4a61d9cd502c1f142b8d29c1296fa7b1b31 100644 --- a/Makefile +++ b/Makefile @@ -3,23 +3,35 @@ SYSCALLS := blocking io-uring io-uring-sqpoll io-uring-no-syscall epoll paio-sig OBJ := $(addprefix bench-,$(SYSCALLS)) -LDFLAGS := -luring -pthread -lrt +LDFLAGS := -luring -pthread -lrt -lm -CFLAGS := -Werror -Wall -g -O3 -D_GNU_SOURCE +CFLAGS := -Werror -Wall -g -O3 # CFLAGS := -Werror -Wall -g -O0 -.PHONY: all clean eval docker-eval check +.PHONY: all clean eval stats dataref docker-eval docker-stats docker-dataref check eval: all - @for syscall in $(SYSCALLS); do echo -n "$$syscall " ; ./bench-$$syscall; done + @for syscall in $(SYSCALLS); do echo -n "$$syscall "; ./bench-$$syscall; done + +stats: + @for syscall in $(SYSCALLS); do echo "$$syscall:"; ./bench-$$syscall --stats; done + +dataref: + @$(MAKE) eval-stats | tools/yaml2dataref.sh docker-eval: ./docker.sh make eval +docker-stats: + ./docker.sh make eval-stats + +docker-dataref: + ./docker.sh make dataref + all: $(OBJ) define generateTargets -bench-$(1): $(1).c bench.c stopwatch.c | Makefile +bench-$(1): $(1).c bench.c stopwatch.c stats.c | Makefile $(CC) $(CFLAGS) -o $$@ $$^ $(LDFLAGS) endef diff --git a/bench.c b/bench.c index 2a2494d0eb3b09a3172466273e76e0071aea89d0..353ddd67df686ec9f2b248cc15b2e0ad22308bae 100644 --- a/bench.c +++ b/bench.c @@ -1,12 +1,13 @@ #include #include -#include +#include #include #include #include +#include #include -#include +#include "stats.h" #include "stopwatch.h" void init(int fd); @@ -25,46 +26,43 @@ static int create_eventfd() { return fd; } -static void set_cpu_affinity(int cpu) { - cpu_set_t set; - // NOLINTNEXTLINE(clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling) - CPU_ZERO(&set); - CPU_SET(cpu, &set); - if (sched_setaffinity(getpid(), sizeof(set), &set) == -1) - err(EXIT_FAILURE, "sched_setaffinity failed"); -} +int main(int argc, char *argv[]) { + bool print_stats = false; + if (argc > 2 || (argc == 2 && !(print_stats = (strcmp(argv[1], "--stats") == 0)))) + errx(EXIT_SUCCESS, "Usage: %s [--stats]", argv[0]); -int main() { - uint64_t cycles_sum = 0; - uint64_t nanos_sum = 0; uint64_t write_buf = 1; - set_cpu_affinity(0); - int fd = create_eventfd(); init(fd); const size_t exp_warmup = warmup; const size_t exp_iterations = iterations; + uint64_t *nanos = malloc(exp_iterations * sizeof(uint64_t)); + uint64_t *cycles = malloc(exp_iterations * sizeof(uint64_t)); + if (!nanos || !cycles) err(EXIT_FAILURE, "allocating memory for our results failed"); + if (exp_iterations == 0) errx(EXIT_FAILURE, "experiment must do at least one iteration"); for (size_t i = 0; i < exp_warmup; ++i) do_write(fd, &write_buf, sizeof(write_buf)); for (int64_t i = 1; i <= exp_iterations; ++i) { do_write(fd, &write_buf, sizeof(write_buf)); - if (__builtin_add_overflow(nanos_sum, clock_diff_nanos(), &nanos_sum)) - errx(EXIT_FAILURE, "nanos overflowed at %ld", i); - if (__builtin_add_overflow(cycles_sum, clock_diff_cycles(), &cycles_sum)) - errx(EXIT_FAILURE, "cycles overflowed at %ld", i); + nanos[i - 1] = clock_diff_nanos(); + cycles[i - 1] = clock_diff_cycles(); } - // Since uint64_t <-> double conversion are not well defined - // and we use really small units (cycles and nanoseconds) I am willing - // to accept that we throw away anything after the decimal point. - uint64_t avg_nanos = nanos_sum / exp_iterations; - uint64_t avg_cycles = cycles_sum / exp_iterations; + if (print_stats) { + print_desc_stats("nanos", " ", nanos, exp_iterations); + print_desc_stats("cycles", " ", cycles, exp_iterations); + } else { + uint64_t avg_nanos = calc_mean(nanos, exp_iterations); + uint64_t avg_cycles = calc_mean(cycles, exp_iterations); + printf("avg-nanos: %lu, avg-cycles: %lu\n", avg_nanos, avg_cycles); + } - printf("%lu ns, %lu cycles\n", avg_nanos, avg_cycles); + free(nanos); + free(cycles); return 0; } diff --git a/common.h b/common.h index 80360f2f320eafdde5385a3f1076f32e18df250d..ade483ffa8685629ad61c97827b4e0c4b7d14011 100644 --- a/common.h +++ b/common.h @@ -1,5 +1,6 @@ #pragma once +#define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) extern size_t warmup, iterations; diff --git a/io-uring-no-syscall.c b/io-uring-no-syscall.c index 62f7c5a257cce2b6fdc146660b8968082736c798..9b0b0147c40daebf1dbf040fe027ca7ee10487ec 100644 --- a/io-uring-no-syscall.c +++ b/io-uring-no-syscall.c @@ -4,26 +4,18 @@ #include #include "common.h" +#include "io_uring.h" #include "stopwatch.h" struct io_uring ring; -void init(__attribute__((unused)) int fd) { - int res = io_uring_queue_init(16, &ring, IORING_SETUP_SQPOLL); - if (res < 0) { - errno = res; - err(EXIT_FAILURE, "io_uring_setup failed"); - } -} +void init(__attribute__((unused)) int fd) { io_uring_init_sqpoll(&ring); } void do_write(int fd, const void *buf, size_t count) { - start_watch(); - struct io_uring_sqe *sqe = io_uring_get_sqe(&ring); - while (unlikely(!sqe)) { - sqe = io_uring_get_sqe(&ring); - } + struct io_uring_sqe *sqe = sqpoll_ring_get_sqe(&ring); io_uring_prep_write(sqe, fd, buf, count, 0); + start_watch(); int res = io_uring_submit(&ring); struct io_uring_cqe *cqe; @@ -32,11 +24,7 @@ void do_write(int fd, const void *buf, size_t count) { stop_watch(); - if (res < 0) { - err(EXIT_FAILURE, "io_submit failed"); - } + if (res < 0) err(EXIT_FAILURE, "io_submit failed"); - if (cqe->res < 0) { - err(EXIT_FAILURE, "write request failed"); - } + if (cqe->res < 0) err(EXIT_FAILURE, "write request failed"); } diff --git a/io-uring-sqpoll.c b/io-uring-sqpoll.c index 349f71ff4266e9469b7cc1b5773904524ed6c4a0..6fc2a9bcf010b709ceb77a1b7ad5436b6b1f67b2 100644 --- a/io-uring-sqpoll.c +++ b/io-uring-sqpoll.c @@ -4,29 +4,19 @@ #include #include "common.h" +#include "io_uring.h" #include "stopwatch.h" struct io_uring ring; -void init(__attribute__((unused)) int fd) { - int res = io_uring_queue_init(16, &ring, IORING_SETUP_SQPOLL); - if (res < 0) { - errno = res; - err(EXIT_FAILURE, "io_uring_setup failed"); - } -} +void init(__attribute__((unused)) int fd) { io_uring_init_sqpoll(&ring); } void do_write(int fd, const void *buf, size_t count) { - start_watch(); - - struct io_uring_sqe *sqe = io_uring_get_sqe(&ring); - while (unlikely(!sqe)) { - sqe = io_uring_get_sqe(&ring); - } + struct io_uring_sqe *sqe = sqpoll_ring_get_sqe(&ring); io_uring_prep_write(sqe, fd, buf, count, 0); + start_watch(); int res = io_uring_submit_and_wait(&ring, 1); - stop_watch(); if (res < 0) { @@ -35,11 +25,8 @@ void do_write(int fd, const void *buf, size_t count) { struct io_uring_cqe *cqe; res = io_uring_peek_cqe(&ring, &cqe); - if (res < 0) { - err(EXIT_FAILURE, "io_uring_peek_cqe failed"); - } - if (cqe->res < 0) { - err(EXIT_FAILURE, "write request failed"); - } + if (res < 0) err(EXIT_FAILURE, "io_uring_peek_cqe failed"); + + if (cqe->res < 0) err(EXIT_FAILURE, "write request failed"); } diff --git a/io_uring.h b/io_uring.h new file mode 100644 index 0000000000000000000000000000000000000000..961ef308661c59069ff991f29d00a896ee9ebb17 --- /dev/null +++ b/io_uring.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +#include "common.h" + +#define GET_SQE_ATTEMPTS 1000000 +struct io_uring_sqe* sqpoll_ring_get_sqe(struct io_uring* ring) { + struct io_uring_sqe* sqe; + size_t attempts = 0; + for (;;) { + sqe = io_uring_get_sqe(ring); + if (likely(sqe)) return sqe; + + ++attempts; + + if (attempts < GET_SQE_ATTEMPTS) continue; + + errx(EXIT_FAILURE, "failed to get sqe after %d", GET_SQE_ATTEMPTS); + } +} + +#define SQPOLL_RING_ENTRIES 16 + +void io_uring_init_sqpoll(struct io_uring* ring) { + int res = io_uring_queue_init(SQPOLL_RING_ENTRIES, ring, IORING_SETUP_SQPOLL); + if (res < 0) { + errno = res; + err(EXIT_FAILURE, "io_uring_setup failed"); + } +} diff --git a/stats.c b/stats.c new file mode 100644 index 0000000000000000000000000000000000000000..70a581f1b0b409d25595c786b1f53e50ab49c252 --- /dev/null +++ b/stats.c @@ -0,0 +1,72 @@ +#include "stats.h" + +#include +#include +#include +#include + +uint64_t calc_median(const uint64_t* data, size_t size) { + // sample size is odd -> there is a middle value + if (size % 2 == 1) return data[size / 2]; + + // sample size is even calculate a virtual middle value + const uint64_t before_median = data[size / 2]; + const uint64_t after_median = data[(size / 2) + 1]; + return before_median + ((after_median - before_median) / 2); +} + +static void check_saple_size(size_t size) { + if (size == 0) errx(EXIT_FAILURE, "data size must not be zero"); +} + +uint64_t calc_mean(const uint64_t* data, size_t size) { + check_saple_size(size); + uint64_t sum = 0; + for (size_t i = 0; i < size; ++i) { + if (__builtin_add_overflow(sum, data[i], &sum)) errx(EXIT_FAILURE, "sum overflowed at %ld", i); + } + return sum / size; +} + +uint64_t calc_var(const uint64_t* data, size_t size, uint64_t mean) { + check_saple_size(size); + uint64_t sum = 0; + for (size_t i = 0; i < size; ++i) { + const uint64_t delta = mean - data[i]; + const uint64_t delta_pow = (uint64_t)pow((double)delta, 2); + if (__builtin_add_overflow(sum, delta_pow, &sum)) + errx(EXIT_FAILURE, "sum overflowed at %ld", i); + } + return sum / size; +} + +static int compare_uint64_t(const void* v1, const void* v2) { + const uint64_t x1 = *(uint64_t*)v1; + const uint64_t x2 = *(uint64_t*)v2; + if (x1 < x2) return -1; + + if (x1 > x2) return 1; + + return 0; +} + +void print_desc_stats(const char* name, const char* indentation, const uint64_t* data, + size_t size) { + // sort our data + qsort((void*)data, size, sizeof(uint64_t), compare_uint64_t); + + const uint64_t min = data[0]; + const uint64_t max = data[size - 1]; + const uint64_t median = calc_median(data, size); + const uint64_t mean = calc_mean(data, size); + const uint64_t var = calc_var(data, size, mean); + const uint64_t std = (uint64_t)sqrt((double)var); + + printf("%s%s:\n", indentation, name); + printf("%s%smin: %lu\n", indentation, indentation, min); + printf("%s%smax: %lu\n", indentation, indentation, max); + printf("%s%smedian: %lu\n", indentation, indentation, median); + printf("%s%smean: %lu\n", indentation, indentation, mean); + printf("%s%svar: %lu\n", indentation, indentation, var); + printf("%s%sstd: %lu\n", indentation, indentation, std); +} diff --git a/stats.h b/stats.h new file mode 100644 index 0000000000000000000000000000000000000000..8c210ce209c09579b82e5bb9b0147fe3c0cb1f96 --- /dev/null +++ b/stats.h @@ -0,0 +1,9 @@ +#pragma once + +#include "stdint.h" +#include "stdlib.h" + +uint64_t calc_mean(const uint64_t* data, size_t size); +uint64_t calc_median(const uint64_t* data, size_t size); +uint64_t calc_var(const uint64_t* data, size_t size, uint64_t mean); +void print_desc_stats(const char* name, const char* indentation, const uint64_t* data, size_t size); \ No newline at end of file diff --git a/tools/yaml2dataref.sh b/tools/yaml2dataref.sh new file mode 100755 index 0000000000000000000000000000000000000000..44de4996054609bb122233593701df4f5b5c6854 --- /dev/null +++ b/tools/yaml2dataref.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# taken from here: +# https://stackoverflow.com/questions/5014632/how-can-i-parse-a-yaml-file-from-a-linux-shell-script +s='[[:space:]]*' +w='[a-zA-Z0-9_]*' +fs=$(echo @|tr @ '\034') +sed -ne "s|^\($s\):|\1|" \ + -e "s|^\($s\)\($w\)$s:${s}[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p" \ + -e "s|^\($s\)\($w\)$s:$s\(.*\)$s\$|\1$fs\2$fs\3|p" | +awk -F"$fs" '{ + indent = length($1)/2; + vname[indent] = $2; + for (i in vname) {if (i > indent) {delete vname[i]}} + if (length($3) > 0) { + vn=""; for (i=0; i