Skip to content
Snippets Groups Projects
Commit aa6b493a authored by Florian Fischer's avatar Florian Fischer
Browse files

add descriptive statistic calculation in python

parent 6b94c657
No related branches found
No related tags found
No related merge requests found
Pipeline #66631 failed
boxplots.*
bench-*
.cache/
......@@ -16,6 +16,9 @@ eval: all
stats: all
@for syscall in $(SYSCALLS); do echo "$$syscall:"; ./bench-$$syscall --stats; done
data: all
@for syscall in $(SYSCALLS); do echo "$$syscall"; ./bench-$$syscall --print-data; done
dataref:
@$(MAKE) stats | tools/yaml2dataref.sh
......@@ -25,6 +28,9 @@ docker-eval:
docker-stats:
./docker.sh make stats
docker-data:
./docker.sh make data
docker-dataref:
./docker.sh make dataref
......
......@@ -28,9 +28,19 @@ static int create_eventfd() {
}
int main(int argc, char *argv[]) {
bool print_data = false;
bool print_stats = false;
if (argc > 2 || (argc == 2 && !(print_stats = (strcmp(argv[1], "--stats") == 0))))
errx(EXIT_SUCCESS, "Usage: %s [--stats]", argv[0]);
if (argc > 2)
errx(EXIT_SUCCESS, "Usage: %s [--print-data]", argv[0]);
if (argc == 2) {
if ((strcmp(argv[1], "--print-data") == 0))
print_data = true;
else if ((strcmp(argv[1], "--stats") == 0))
print_stats = true;
else
errx(EXIT_SUCCESS, "Usage: %s [--print-data]", argv[0]);
}
uint64_t read_buf = 1;
......@@ -54,7 +64,10 @@ int main(int argc, char *argv[]) {
cycles[i - 1] = clock_diff_cycles();
}
if (print_stats) {
if (print_data) {
for (unsigned i = 0; i < exp_iterations; ++i)
printf("%lu,%lu\n", nanos[i], cycles[i]);
} else if (print_stats) {
print_desc_stats("nanos", " ", nanos, exp_iterations);
print_desc_stats("cycles", " ", cycles, exp_iterations);
} else {
......
#!/usr/bin/python3
from pathlib import Path
import sys
import typing as T
import numpy
import yaml
def print_usage_and_exit():
print(f'Usage: {sys.argv[0]} [csv-file | data-dir]')
sys.exit(0)
Measurements = T.Mapping[str, T.Sequence[int]]
Data = T.Mapping[str, Measurements]
def read_csv(stream) -> Data:
data = {}
all_nanos, all_cycles = [], []
lines = stream.read().splitlines()
syscall = lines[0]
for line in lines[1:]:
if not ',' in line:
# store data collected from previous syscall
data[syscall] = {'nanos': all_nanos, 'cycles': all_cycles}
all_nanos, all_cycles = [], []
syscall = line
continue
nanos, cycles = line.split(',')
nanos, cycles = int(nanos), int(cycles)
all_nanos.append(nanos)
all_cycles.append(cycles)
return data
SyscallStats = T.Mapping[str, float]
Stats = T.Mapping[str, SyscallStats]
def calc_stats(data: Data) -> Stats:
stats = {}
for syscall, measurements in data.items():
syscall_stats = {}
stats[syscall] = syscall_stats
for measure, values in measurements.items():
measure_stats = {}
syscall_stats[measure] = measure_stats
measure_stats['mean'] = numpy.mean(values)
measure_stats['std'] = numpy.std(values)
values.sort()
measure_stats['min'] = values[0]
measure_stats['may'] = values[-1]
measure_stats['median'] = numpy.median(values)
upper_quartile = numpy.percentile(values, 75)
measure_stats['upper_quartile'] = upper_quartile
lower_quartile = numpy.percentile(values, 25)
measure_stats['lower_quartile'] = lower_quartile
iqr = upper_quartile - lower_quartile
# find whiskers
i = 0
while values[i] < lower_quartile - 1.5 * iqr:
i += 1
measure_stats['lower_whisker'] = values[i]
# measure_stats['outliers'] = values[:i]
i = len(values) - 1
while values[i] > upper_quartile + 1.5 * iqr:
i -= 1
measure_stats['upper_whisker'] = values[i]
# measure_stats['outliers'] += values[i + 1:]
# convert everything to float to easily dump it using pyyaml
for k, v in measure_stats.items():
if type(v) is list:
continue
measure_stats[k] = float(v)
return stats
def main():
if len(sys.argv) == 1:
data = read_csv(sys.stdin)
elif len(sys.argv) > 2:
print_usage_and_exit()
data_path = Path(sys.argv[1])
if not data_path.exists():
print('Path: {data_path} does not exists')
sys.exit(1)
if data_path.is_file():
with open(data_path, 'r') as data_file:
data = read_csv(data_file)
stats = calc_stats(data)
print(yaml.safe_dump(stats))
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment