Select Git revision
parse_results.py 8.20 KiB
#!/usr/bin/env python3
import argparse
from configparser import ConfigParser
import csv
import fnmatch
import io
import re
from pathlib import Path
from typing import Dict, Iterable, List, Mapping, Optional, Sequence, Union
import numpy as np
Number = Union[int, float]
def to_number(string: str) -> Number:
# we convert always to float so python parses scientific notation
f = float(string)
return f if '.' in string else int(f)
Target = str
Connections = int
Run = int
DataPoint = Union[List[Number], Number]
ExperimentData = Dict[str, DataPoint]
ImplDataPerRun = Dict[Connections, Dict[Run, List[ExperimentData]]]
ImplData = Dict[Connections, List[ExperimentData]]
EvaluationResults = Dict[Target, ImplData]
def sum_data(data: Sequence[ExperimentData]) -> ExperimentData:
"""sum up experiment results from multiple clients"""
res = data[0]
for exp_data in data[1:]:
for k, v in exp_data.items():
# do not sum up duration times
if k in ['connect_duration', 'echo_duration', 'total_duration']:
continue
res_v = res[k]
if isinstance(v, list):
assert isinstance(res_v, list)
res[k] = [sum(t) for t in zip(v, res_v)]
else:
assert not isinstance(res_v, list)
res_v += v
res[k] = res_v
return res
def collect_results(result_dir: Union[Path, str],
include: Sequence[str] = None,
exclude: Sequence[str] = None,
file_extension="ini",
print_experiments=False) -> EvaluationResults:
result_dir = Path(result_dir)
impl_dirs = {d.name: d for d in result_dir.iterdir() if d.is_dir()}
if include:
filtered_impl_dirs = {}
for impl in include:
filtered_impl_dirs.update({
i: impl_dirs[i]
for i in fnmatch.filter(impl_dirs.keys(), impl)
})
impl_dirs = filtered_impl_dirs
if exclude:
filtered_impl_dirs = impl_dirs.copy()
for pattern in exclude:
for impl in fnmatch.filter(impl_dirs.keys(), pattern):
if impl in filtered_impl_dirs:
del filtered_impl_dirs[impl]
impl_dirs = filtered_impl_dirs
data = {}
for impl, impl_dir in impl_dirs.items():
impl_d: ImplDataPerRun = {}
data_files = impl_dir.glob(f'*.{file_extension}')
# collect all data
for data_path in data_files:
configparser = ConfigParser()
configparser.read(data_path)
data_path_re = r'(?P<run>\d+?)\.(?P<cons>\d+?)\.(?P<size>\d+?)\.ini'
match = re.search(data_path_re, str(data_path))
assert match
run = int(match.group('run'))
exp_data: ExperimentData = {}
error = False
for k in ['connect_duration', 'echo_duration', 'total_duration']:
try:
exp_data[k] = to_number(configparser['global'][k])
except KeyError:
error = True
print(f'ERROR in {data_path}')
break
if error:
continue
csv_data = []
csvfile = io.StringIO(configparser['clients']['csv'][1:])
reader = csv.DictReader(csvfile)
# convert everything to numbers if possible
for row in list(reader):
csv_data.append({k: to_number(v) for k, v in row.items()})
cons = int(configparser['global']['clients'])
client_iterations = [c['iterations'] for c in csv_data]
exp_data['client-iterations'] = client_iterations
echos = sum(client_iterations)
exp_data['total_iterations'] = echos
# We know that echo_duration is a Number so ignore mypy
iops = echos / (exp_data['echo_duration'] * 10**-9) # type: ignore
exp_data['iops'] = iops
if not cons in impl_d:
impl_d[cons] = {run: [exp_data]}
else:
if run not in impl_d[cons]:
impl_d[cons][run] = [exp_data]
else:
impl_d[cons][run].append(exp_data)
if print_experiments:
print(
f'{data_path}: clients: {configparser["global"]["clients"]} ',
end='')
print(exp_data)
# only add the implementations data if there is actual data
if impl_d:
# sum clients exp_data
data[impl] = {
cons: [sum_data(run_data) for run_data in cons_data.values()]
for cons, cons_data in impl_d.items()
}
return data
Stats = Dict[str, float]
ConnectionStats = Dict[str, Stats]
ImplStats = Dict[int, ConnectionStats]
EvaluationStats = Dict[str, ImplStats]
def calculate_stats(data: EvaluationResults, warn=False) -> EvaluationStats:
stats = {}
for impl, impl_d in data.items():
impl_stats = {}
for cons in impl_d:
cons_stats = {}
for key in impl_d[cons][0]:
_values = [x[key] for x in impl_d[cons]]
if isinstance(_values[0], list):
values = [x for l in _values for x in l] # type: ignore
else:
values = _values
cur_stats = {}
cur_stats['mean'] = np.mean(values)
cur_stats['median'] = np.median(values)
cur_stats['min'] = min(values)
cur_stats['max'] = max(values)
cur_stats['std'] = np.std(values)
if warn:
precision = cur_stats['std'] / cur_stats['mean']
if precision > 0.05:
print('Warning: imprecise data impl:', end='')
print(
f' {impl}, c: {cons}, k: {key} p: {precision*100:.5}%'
)
cons_stats[key] = cur_stats
impl_stats[cons] = cons_stats
stats[impl] = impl_stats
return stats
def print_stats_for_variable(variable: str, stats: Mapping):
print(f'{variable}: ', end='')
for name, value in stats.items():
print(f'{name}: {value}, ', end='')
print(f'std[%]: {(stats["std"]/stats["mean"])*100:.2f}%')
def print_stats(stats: EvaluationStats, variables: Optional[Iterable]):
for impl, impl_stats in stats.items():
print(f'{impl}:')
for cons in sorted(list(impl_stats.keys())):
cons_stats = impl_stats[cons]
print(f' {cons} connections:')
if not variables:
variables = cons_stats.keys()
for variable in variables:
print(' ', end='')
print_stats_for_variable(variable, cons_stats[variable])
def main():
parser = argparse.ArgumentParser()
parser.add_argument("result_dir")
parser.add_argument("-w",
"--warn",
help='warn about imprecise data',
action='store_true')
parser.add_argument("--print-experiments",
help='print results from each individual experiment',
action='store_true')
parser.add_argument("-i",
"--implementations",
help="implementations to collect",
nargs='+')
parser.add_argument("-ix",
"--exclude-implementations",
help="implementations to exclude",
nargs='+')
parser.add_argument(
"--print-stats",
help="variables about which the collected statistics should be print",
nargs='*',
default=['iops'])
args = parser.parse_args()
data = collect_results(args.result_dir,
include=args.implementations,
exclude=args.exclude_implementations,
print_experiments=args.print_experiments)
stats = calculate_stats(data, args.warn)
if args.print_stats is not None:
print_stats(stats, variables=args.print_stats)
if __name__ == '__main__':
main()