diff --git a/.gitignore b/.gitignore
index 1f7502762227d2ef8dd99f2a6554b440137264ef..cd1f818d5e27a9c5a8baf9c943b4e621a33493d2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
+boxplots.*
 bench-*
 .cache/
diff --git a/Makefile b/Makefile
index 67b2ba482578cb6c22491080558c4df5961696ff..8e77a20fac9ea7d5dbe3018f8c70c358742d6515 100644
--- a/Makefile
+++ b/Makefile
@@ -16,6 +16,9 @@ eval: all
 stats: all
 	@for syscall in $(SYSCALLS); do echo "$$syscall:"; ./bench-$$syscall --stats; done
 
+data: all
+	@for syscall in $(SYSCALLS); do echo "$$syscall"; ./bench-$$syscall --print-data; done
+
 dataref:
 	@$(MAKE) stats | tools/yaml2dataref.sh
 
@@ -25,6 +28,9 @@ docker-eval:
 docker-stats:
 	./docker.sh make stats
 
+docker-data:
+	./docker.sh make data
+
 docker-dataref:
 	./docker.sh make dataref
 
diff --git a/bench.c b/bench.c
index cff9124d1c8cd07dace926278891bbad055a25c1..cce0ddb0ec7084dae1db87103f80cab28f62e56f 100644
--- a/bench.c
+++ b/bench.c
@@ -28,9 +28,19 @@ static int create_eventfd() {
 }
 
 int main(int argc, char *argv[]) {
+	bool print_data = false;
 	bool print_stats = false;
-	if (argc > 2 || (argc == 2 && !(print_stats = (strcmp(argv[1], "--stats") == 0))))
-		errx(EXIT_SUCCESS, "Usage: %s [--stats]", argv[0]);
+	if (argc > 2)
+		errx(EXIT_SUCCESS, "Usage: %s [--print-data]", argv[0]);
+
+	if (argc == 2) {
+		if ((strcmp(argv[1], "--print-data") == 0))
+			print_data = true;
+		else if ((strcmp(argv[1], "--stats") == 0))
+			print_stats = true;
+		else
+			errx(EXIT_SUCCESS, "Usage: %s [--print-data]", argv[0]);
+	}
 
 	uint64_t read_buf = 1;
 
@@ -54,7 +64,10 @@ int main(int argc, char *argv[]) {
 		cycles[i - 1] = clock_diff_cycles();
 	}
 
-	if (print_stats) {
+	if (print_data) {
+		for (unsigned i = 0; i < exp_iterations; ++i)
+			printf("%lu,%lu\n", nanos[i], cycles[i]);
+	} else if (print_stats) {
 		print_desc_stats("nanos", "  ", nanos, exp_iterations);
 		print_desc_stats("cycles", "  ", cycles, exp_iterations);
 	} else {
diff --git a/calc_stats.py b/calc_stats.py
new file mode 100755
index 0000000000000000000000000000000000000000..7d6ddb1e1b733a10878f09b45ccfc8dc8d734734
--- /dev/null
+++ b/calc_stats.py
@@ -0,0 +1,104 @@
+#!/usr/bin/python3
+
+from pathlib import Path
+import sys
+import typing as T
+
+import numpy
+import yaml
+
+def print_usage_and_exit():
+    print(f'Usage: {sys.argv[0]} [csv-file | data-dir]')
+    sys.exit(0)
+
+Measurements = T.Mapping[str, T.Sequence[int]]
+Data = T.Mapping[str, Measurements]
+def read_csv(stream) -> Data:
+    data = {}
+    all_nanos, all_cycles = [], []
+    lines = stream.read().splitlines()
+    syscall = lines[0]
+    for line in lines[1:]:
+        if not ',' in line:
+            # store data collected from previous syscall
+            data[syscall] = {'nanos': all_nanos, 'cycles': all_cycles}
+            all_nanos, all_cycles = [], []
+            syscall = line
+            continue
+
+        nanos, cycles = line.split(',')
+        nanos, cycles = int(nanos), int(cycles)
+        all_nanos.append(nanos)
+        all_cycles.append(cycles)
+
+    return data
+
+SyscallStats = T.Mapping[str, float]
+Stats = T.Mapping[str, SyscallStats]
+def calc_stats(data: Data) -> Stats:
+    stats = {}
+    for syscall, measurements in data.items():
+        syscall_stats = {}
+        stats[syscall] = syscall_stats
+
+        for measure, values in measurements.items():
+            measure_stats = {}
+            syscall_stats[measure] = measure_stats
+
+
+            measure_stats['mean'] = numpy.mean(values)
+            measure_stats['std'] = numpy.std(values)
+
+            values.sort()
+            measure_stats['min'] = values[0]
+            measure_stats['may'] = values[-1]
+            measure_stats['median'] = numpy.median(values)
+            upper_quartile = numpy.percentile(values, 75)
+            measure_stats['upper_quartile'] = upper_quartile
+
+            lower_quartile = numpy.percentile(values, 25)
+            measure_stats['lower_quartile'] = lower_quartile
+            iqr = upper_quartile - lower_quartile
+
+            # find whiskers
+            i = 0
+            while values[i] < lower_quartile - 1.5 * iqr:
+                i += 1
+            measure_stats['lower_whisker'] = values[i]
+            # measure_stats['outliers'] = values[:i]
+
+            i = len(values) - 1
+            while values[i] > upper_quartile + 1.5 * iqr:
+                i -= 1
+            measure_stats['upper_whisker'] = values[i]
+            # measure_stats['outliers'] += values[i + 1:]
+
+            # convert everything to float to easily dump it using pyyaml
+            for k, v in measure_stats.items():
+                if type(v) is list:
+                    continue
+                measure_stats[k] = float(v)
+
+    return stats
+
+
+def main():
+    if len(sys.argv) == 1:
+        data = read_csv(sys.stdin)
+    elif len(sys.argv) > 2:
+        print_usage_and_exit()
+
+    data_path = Path(sys.argv[1])
+    if not data_path.exists():
+        print('Path: {data_path} does not exists')
+        sys.exit(1)
+    
+    if data_path.is_file():
+        with open(data_path, 'r') as data_file:
+            data = read_csv(data_file)
+
+    stats = calc_stats(data)
+    print(yaml.safe_dump(stats))
+
+if __name__ == '__main__':
+    main()