diff --git a/.gitignore b/.gitignore index 1f7502762227d2ef8dd99f2a6554b440137264ef..cd1f818d5e27a9c5a8baf9c943b4e621a33493d2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ +boxplots.* bench-* .cache/ diff --git a/Makefile b/Makefile index 67b2ba482578cb6c22491080558c4df5961696ff..8e77a20fac9ea7d5dbe3018f8c70c358742d6515 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,9 @@ eval: all stats: all @for syscall in $(SYSCALLS); do echo "$$syscall:"; ./bench-$$syscall --stats; done +data: all + @for syscall in $(SYSCALLS); do echo "$$syscall"; ./bench-$$syscall --print-data; done + dataref: @$(MAKE) stats | tools/yaml2dataref.sh @@ -25,6 +28,9 @@ docker-eval: docker-stats: ./docker.sh make stats +docker-data: + ./docker.sh make data + docker-dataref: ./docker.sh make dataref diff --git a/bench.c b/bench.c index cff9124d1c8cd07dace926278891bbad055a25c1..cce0ddb0ec7084dae1db87103f80cab28f62e56f 100644 --- a/bench.c +++ b/bench.c @@ -28,9 +28,19 @@ static int create_eventfd() { } int main(int argc, char *argv[]) { + bool print_data = false; bool print_stats = false; - if (argc > 2 || (argc == 2 && !(print_stats = (strcmp(argv[1], "--stats") == 0)))) - errx(EXIT_SUCCESS, "Usage: %s [--stats]", argv[0]); + if (argc > 2) + errx(EXIT_SUCCESS, "Usage: %s [--print-data]", argv[0]); + + if (argc == 2) { + if ((strcmp(argv[1], "--print-data") == 0)) + print_data = true; + else if ((strcmp(argv[1], "--stats") == 0)) + print_stats = true; + else + errx(EXIT_SUCCESS, "Usage: %s [--print-data]", argv[0]); + } uint64_t read_buf = 1; @@ -54,7 +64,10 @@ int main(int argc, char *argv[]) { cycles[i - 1] = clock_diff_cycles(); } - if (print_stats) { + if (print_data) { + for (unsigned i = 0; i < exp_iterations; ++i) + printf("%lu,%lu\n", nanos[i], cycles[i]); + } else if (print_stats) { print_desc_stats("nanos", " ", nanos, exp_iterations); print_desc_stats("cycles", " ", cycles, exp_iterations); } else { diff --git a/calc_stats.py b/calc_stats.py new file mode 100755 index 0000000000000000000000000000000000000000..7d6ddb1e1b733a10878f09b45ccfc8dc8d734734 --- /dev/null +++ b/calc_stats.py @@ -0,0 +1,104 @@ +#!/usr/bin/python3 + +from pathlib import Path +import sys +import typing as T + +import numpy +import yaml + +def print_usage_and_exit(): + print(f'Usage: {sys.argv[0]} [csv-file | data-dir]') + sys.exit(0) + +Measurements = T.Mapping[str, T.Sequence[int]] +Data = T.Mapping[str, Measurements] +def read_csv(stream) -> Data: + data = {} + all_nanos, all_cycles = [], [] + lines = stream.read().splitlines() + syscall = lines[0] + for line in lines[1:]: + if not ',' in line: + # store data collected from previous syscall + data[syscall] = {'nanos': all_nanos, 'cycles': all_cycles} + all_nanos, all_cycles = [], [] + syscall = line + continue + + nanos, cycles = line.split(',') + nanos, cycles = int(nanos), int(cycles) + all_nanos.append(nanos) + all_cycles.append(cycles) + + return data + +SyscallStats = T.Mapping[str, float] +Stats = T.Mapping[str, SyscallStats] +def calc_stats(data: Data) -> Stats: + stats = {} + for syscall, measurements in data.items(): + syscall_stats = {} + stats[syscall] = syscall_stats + + for measure, values in measurements.items(): + measure_stats = {} + syscall_stats[measure] = measure_stats + + + measure_stats['mean'] = numpy.mean(values) + measure_stats['std'] = numpy.std(values) + + values.sort() + measure_stats['min'] = values[0] + measure_stats['may'] = values[-1] + measure_stats['median'] = numpy.median(values) + upper_quartile = numpy.percentile(values, 75) + measure_stats['upper_quartile'] = upper_quartile + + lower_quartile = numpy.percentile(values, 25) + measure_stats['lower_quartile'] = lower_quartile + iqr = upper_quartile - lower_quartile + + # find whiskers + i = 0 + while values[i] < lower_quartile - 1.5 * iqr: + i += 1 + measure_stats['lower_whisker'] = values[i] + # measure_stats['outliers'] = values[:i] + + i = len(values) - 1 + while values[i] > upper_quartile + 1.5 * iqr: + i -= 1 + measure_stats['upper_whisker'] = values[i] + # measure_stats['outliers'] += values[i + 1:] + + # convert everything to float to easily dump it using pyyaml + for k, v in measure_stats.items(): + if type(v) is list: + continue + measure_stats[k] = float(v) + + return stats + + +def main(): + if len(sys.argv) == 1: + data = read_csv(sys.stdin) + elif len(sys.argv) > 2: + print_usage_and_exit() + + data_path = Path(sys.argv[1]) + if not data_path.exists(): + print('Path: {data_path} does not exists') + sys.exit(1) + + if data_path.is_file(): + with open(data_path, 'r') as data_file: + data = read_csv(data_file) + + stats = calc_stats(data) + print(yaml.safe_dump(stats)) + +if __name__ == '__main__': + main()