Commit 92f9e30d authored by Florian Fischer's avatar Florian Fischer
Browse files

add usefull chattymalloc support and --nolibmemusage flag

improve bench.py
improve process_stdout hook -> process_output
rename perf_cmd -> measure_cmd
parent e6ccf23e
......@@ -22,6 +22,7 @@ parser.add_argument("-b", "--benchmarks", help="benchmarks to run", nargs='+')
parser.add_argument("-ns", "--nosum", help="don't produce plots", action='store_true')
parser.add_argument("-sd", "--summarydir", help="directory where all plots and the summary go", type=str)
parser.add_argument("-a", "--analyse", help="collect allocation sizes", action='store_true')
parser.add_argument("--nolibmemusage", help="don't use libmemusage to analyse", action='store_true')
def main():
args = parser.parse_args()
......@@ -44,16 +45,16 @@ def main():
if args.analyse and hasattr(bench, "analyse") and callable(bench.analyse):
print("Analysing", bench.name, "...")
bench.analyse(verbose=args.verbose)
analyse_args = {"nolibmemusage": args.nolibmemusage, "verbose": args.verbose}
bench.analyse(**analyse_args)
print("Running", bench.name, "...")
if not bench.run(runs=args.runs, verbose=args.verbose):
continue
if args.save:
bench.save()
if not args.nosum:
if not args.nosum and not (args.runs < 1 and not args.load):
print("Summarizing", bench.name, "...")
bench.summary(args.summarydir)
......
......@@ -4,6 +4,7 @@ import csv
import itertools
import os
import pickle
import shutil
import subprocess
from common_targets import common_targets
......@@ -14,7 +15,7 @@ class Benchmark (object):
"name" : "default_benchmark",
"description" : "This is the default benchmark description please add your own useful one.",
"perf_cmd" : "perf stat -x, -dd ",
"measure_cmd" : "perf stat -x, -dd ",
"analyse_cmd" : "memusage -p {} -t ",
"cmd" : "true",
"targets" : common_targets,
......@@ -114,64 +115,63 @@ class Benchmark (object):
yield p
def analyse(self, verbose=False):
for perm in self.iterate_args():
def analyse(self, verbose=False, nolibmemusage=True):
if not nolibmemusage and not shutil.which("memusage"):
print("memusage not found. Using chattymalloc.")
libmemusage = False
if nolibmemusage:
import chattyparser
actual_cmd = ""
old_preload = os.environ.get("LD_PRELOAD", None)
os.environ["LD_PRELOAD"] = "build/chattymalloc.so"
n = len(list(self.iterate_args()))
for i, perm in enumerate(self.iterate_args()):
print(i + 1, "of", n, "\r", end='')
perm = perm._asdict()
file_name = self.name + "."
file_name += ".".join([str(x) for x in perm.values()])
file_name += ".memusage"
actual_cmd = self.analyse_cmd.format(file_name + ".png")
if not nolibmemusage:
actual_cmd = self.analyse_cmd.format(file_name + ".png")
if "binary_suffix" in self.cmd:
perm["binary_suffix"] = ""
actual_cmd += self.cmd.format(**perm)
with open(file_name + ".hist", "w") as f:
res = subprocess.run(actual_cmd.split(),
res = subprocess.run(actual_cmd.split(),
stdout=subprocess.PIPE,
stderr=f,
stderr=subprocess.PIPE,
universal_newlines=True)
if res.returncode != 0:
print(actual_cmd, "failed.")
print("Aborting analysing.")
print("You may look at", file_name + ".hist", "to fix this.")
return
def parse_chattymalloc_data(self, path="chattymalloc.data"):
hist = {}
total = 0
with open(path, "r") as f:
for l in f.readlines():
total += 1
if res.returncode != 0:
print(actual_cmd, "failed.")
print("Stdout:", res.stdout)
print("Stderr:", res.stderr)
print("Aborting analysing.")
return
if nolibmemusage:
try:
n = int(l)
except ValueError:
pass
hist[n] = hist.get(n, 0) + 1
hist["total"] = total
return hist
def plot_hist_ascii(self, hist, path):
total = hist["total"]
del(hist["total"])
bins = {}
bin = 1
for size in sorted(hist):
if int(size) > bin * 16:
bin += 1
bins[bin] = bins.get(bin, 0) + hist[size]
hist["total"] = total
with open(path, "w") as f:
print("Total malloc calls:", total, file=f)
print("Histogram of sizes:", file=f)
for b in sorted(bins):
perc = bins[b]/total*100
print((b-1)*16, '-', b*16-1, '\t', bins[b],
perc, '%', '*'*int(perc/2), file=f)
hist, calls, reqsize, top5reqsize = chattyparser.parse()
top5 = [s[1] for s in sorted([(n, s) for s, n in hist.items()])]
hist, calls, reqsize, top5reqsize = chattyparser.parse(track_top5=top5)
chattyparser.plot_hist_ascii(hist, calls, file_name + ".hist")
chattyparser.plot_profile(reqsize, top5reqsize, file_name + ".profile.png")
except MemoryError as memerr:
print("Can't Analyse", actual_cmd, "with chattymalloc because",
"to much memory would be needed.")
continue
os.environ["LD_PRELOAD"] = old_preload or ""
print()
def run(self, verbose=False, runs=5):
if runs > 0:
print("Running", self.name, "...")
n = len(list(self.iterate_args())) * len(self.targets)
for run in range(1, runs + 1):
print(str(run) + ". run")
......@@ -190,9 +190,9 @@ class Benchmark (object):
for perm in self.iterate_args():
i += 1
print(i, "of", n, "\r", end='')
print(i, "of", n,"\r", end='')
actual_cmd = self.perf_cmd
actual_cmd = self.measure_cmd
perm_dict = perm._asdict()
perm_dict.update(t)
......@@ -227,11 +227,12 @@ class Benchmark (object):
break
os.remove("status")
if hasattr(self, "process_stdout"):
self.process_stdout(result, res.stdout, verbose)
if hasattr(self, "process_output"):
self.process_output(result, res.stdout, res.stderr,
tname, perm, verbose)
# Parse perf output if available
if self.perf_cmd != "":
if self.measure_cmd != self.defaults["measure_cmd"]:
csvreader = csv.reader(res.stderr.splitlines(), delimiter=',')
for row in csvreader:
# Split of the user/kernel space info to be better portable
......
#define _GNU_SOURCE
#include <dlfcn.h>
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static char tmpbuff[1024];
static unsigned long tmppos = 0;
static unsigned long tmpallocs = 0;
static FILE* out = NULL;
static int in_fprintf = 0;
static int out = -1;
static int prevent_recursion = 0;
/*=========================================================
* * interception points
......@@ -20,22 +24,42 @@ static void * (*myfn_malloc)(size_t size);
static void (*myfn_free)(void* ptr);
static void * (*myfn_calloc)(size_t nmemb, size_t size);
static void * (*myfn_realloc)(void* ptr, size_t size);
static void * (*myfn_memalign)(size_t alignment, size_t size);
static void write_output(const char* fmt, ...)
{
if (!prevent_recursion)
{
prevent_recursion = 1;
/* lockf(out, F_LOCK, 0); */
va_list args;
va_start(args, fmt);
vdprintf(out, fmt, args);
va_end(args);
/* lockf(out, F_ULOCK, 0); */
prevent_recursion = 0;
}
}
static void init()
{
out = fopen("chattymalloc.data", "w");
if (out == NULL)
out = open("chattymalloc.data", O_WRONLY | O_TRUNC | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (out == -1)
{
fprintf(stderr, "failed to open output file\n");
fprintf(stderr, "failed to open output file with %d\n", errno);
exit(1);
}
myfn_malloc = dlsym(RTLD_NEXT, "malloc");
myfn_free = dlsym(RTLD_NEXT, "free");
myfn_calloc = dlsym(RTLD_NEXT, "calloc");
myfn_realloc = dlsym(RTLD_NEXT, "realloc");
myfn_calloc = dlsym(RTLD_NEXT, "calloc");
myfn_realloc = dlsym(RTLD_NEXT, "realloc");
myfn_memalign = dlsym(RTLD_NEXT, "memalign");
if (!myfn_malloc || !myfn_free || !myfn_calloc || !myfn_realloc)
if (!myfn_malloc || !myfn_free || !myfn_calloc || !myfn_realloc || !myfn_memalign)
{
fprintf(stderr, "Error in `dlsym`: %s\n", dlerror());
exit(1);
......@@ -66,19 +90,15 @@ void *malloc(size_t size)
}
else
{
fprintf(stderr, "%d in %d allocs\n", tmppos, tmpallocs);
fprintf(stderr, "jcheck: too much memory requested during initialisation - increase tmpbuff size\n");
exit(1);
}
}
}
if (!in_fprintf)
{
in_fprintf = 1;
fprintf(out, "%d\n", size);
in_fprintf = 0;
}
void *ptr = myfn_malloc(size);
write_output("m %zu %p\n", size, ptr);
return ptr;
}
......@@ -88,7 +108,10 @@ void free(void *ptr)
if (myfn_malloc == NULL)
init();
if (!(ptr >= (void*) tmpbuff && ptr <= (void*)(tmpbuff + tmppos)))
{
write_output("f %p\n", ptr);
myfn_free(ptr);
}
}
void* realloc(void *ptr, size_t size)
......@@ -104,13 +127,9 @@ void* realloc(void *ptr, size_t size)
return nptr;
}
if (!in_fprintf)
{
in_fprintf = 1;
fprintf(out, "%d\n", size);
in_fprintf = 0;
}
return myfn_realloc(ptr, size);
void* nptr = myfn_realloc(ptr, size);
write_output("r %p %zu %p\n", ptr, size, nptr);
return nptr;
}
void* calloc(size_t nmemb, size_t size)
......@@ -123,11 +142,20 @@ void* calloc(size_t nmemb, size_t size)
return ptr;
}
if (!in_fprintf)
void* ptr = myfn_calloc(nmemb, size);
write_output("c %zu %zu %p\n", nmemb, size, ptr);
return ptr;
}
void* memalign(size_t alignment, size_t size)
{
if (myfn_memalign == NULL)
{
in_fprintf = 1;
fprintf(out, "%d\n", size*nmemb);
in_fprintf = 0;
fprintf(stderr, "called memalign before or during init");
exit(1);
}
return myfn_calloc(nmemb, size);
void* ptr = myfn_memalign(alignment, size);
write_output("mm %zu %zu %p\n", alignment, size, ptr);
return ptr;
}
import re
rss_re = re.compile("^VmRSS:\s+(\d+) kB$")
import matplotlib.pyplot as plt
import numpy as np
ptr = "(?:0x)?(?P<ptr>(?:\w+)|(?:\(nil\)))"
size = "(?P<size>\d+)"
time = "(?P<time>\d+)"
tid = "(?P<tid>\d+)"
malloc_re = re.compile("^{} {} ma {} {}$".format(time, tid, size, ptr))
free_re = re.compile("^{} {} f {}$".format(time, tid, ptr))
calloc_re = re.compile("^{} {} c (?P<nmemb>\d+) {} {}$".format(time, tid, size, ptr))
realloc_re = re.compile("^{} {} r {} {} {}$".format(time, tid, ptr, size, ptr.replace("ptr", "nptr")))
memalign_re = re.compile("^{} {} mm (?P<alignment>\d+) {} {}$".format(time, tid, size, ptr))
malloc_re = re.compile("^m {} {}$".format(size, ptr))
free_re = re.compile("^f {}$".format(ptr))
calloc_re = re.compile("^c (?P<nmemb>\d+) {} {}$".format(size, ptr))
realloc_re = re.compile("^r {} {} {}$".format(ptr, size, ptr.replace("ptr", "nptr")))
memalign_re = re.compile("^mm (?P<alignment>\d+) {} {}$".format(size, ptr))
def record_allocation(hist, total_size, top5, top5_sizes, allocations, ptr, size, optr=None, add=True):
size = int(size)
if add:
if optr and optr in allocations:
size -= allocations[optr]
del(allocations[optr])
allocations[ptr] = size
hist[size] = hist.get(size, 0) + 1
if type(total_size[-1]) != int or type(size) != int:
print("invalid type", type(total_size[-1]), type(size))
return
total_size.append(total_size[-1] + size)
for s in top5:
if s == size:
top5_sizes[s].append(top5_sizes[s][-1] + s)
else:
top5_sizes[s].append(top5_sizes[s][-1])
def analyse(path="chattymalloc.data"):
elif ptr != "(nil)" and ptr in allocations:
size = allocations[ptr]
total_size.append(total_size[-1] - size)
for s in top5:
if s == size:
top5_sizes[s].append(top5_sizes[s][-1] - s)
else:
top5_sizes[s].append(top5_sizes[s][-1])
del(allocations[ptr])
def parse(path="chattymalloc.data", track_top5=[]):
tmalloc, tcalloc, trealloc, tfree, tmemalign= 0, 0, 0, 0, 0
allocations = {}
requested_size = [0]
requested_size_top5 = {s: [0] for s in track_top5}
hist = {}
ln = 0
with open(path, "r") as f:
#Skip first empty line. See chattymalloc.c why it is there.
# for bl in f.readlines()[1:]:
for l in f.readlines():
for i, l in enumerate(f.readlines()):
ln += 1
res = malloc_re.match(l)
if res != None:
res = res.groupdict()
size = int(res["size"])
allocations[res["ptr"]] = size
requested_size.append(requested_size[-1] + size)
hist[size] = hist.get(size, 0)
record_allocation(hist, requested_size, track_top5, requested_size_top5,
allocations, res["ptr"], res["size"])
tmalloc += 1
continue
res = free_re.match(l)
if res != None:
res = res.groupdict()
ptr = res["ptr"]
if ptr == "(nil)" or len(ptr) != 12:
continue
requested_size.append(requested_size[-1] - allocations[ptr])
del(allocations[ptr])
record_allocation(hist, requested_size, track_top5, requested_size_top5,
allocations, res["ptr"], 0, add=False)
tfree +=1
continue
res = calloc_re.match(l)
if res != None:
res = res.groupdict()
size = int(res["nmemb"]) * int(res["size"])
allocations[res["ptr"]] = size
requested_size.append(requested_size[-1] + size)
hist[size] = hist.get(size, 0)
record_allocation(hist, requested_size, track_top5, requested_size_top5,
allocations, res["ptr"], size)
tcalloc += 1
continue
res = realloc_re.match(l)
if res != None:
res = res.groupdict()
optr, size, nptr = res["ptr"], int(res["size"]), res["nptr"]
if optr == nptr:
requested_size.append(requested_size[-1] + size - allocations[nptr])
allocations[nptr] = size
else:
if optr in allocations:
requested_size.append(requested_size[-1] + size - allocations[optr])
del(allocations[optr])
else:
requested_size.append(requested_size[-1] + size)
allocations[nptr] = size
record_allocation(hist, requested_size, track_top5, requested_size_top5,
allocations, res["nptr"], res["size"], optr=res["ptr"])
trealloc += 1
continue
res = memalign_re.match(l)
if res != None:
res = res.groupdict()
size, ptr = int(res["size"]), res["ptr"]
allocations[ptr] = size
requested_size.append(requested_size[-1] + size)
hist[size] = hist.get(size, 0)
record_allocation(hist, requested_size, track_top5, requested_size_top5,
allocations, res["ptr"], res["size"])
tmemalign += 1
continue
print("\ninvalid line at", ln, ":", l)
return requested_size, hist
calls = {"malloc": tmalloc, "free": tfree, "calloc": tcalloc, "realloc": trealloc, "memalign": tmemalign}
return hist, calls, requested_size, requested_size_top5
def hist(path="chattymalloc.data"):
return analyse(path=path)[1]
return parse(path=path)[0]
def plot_profile(total_size, total_top5, path):
x_vals = list(range(0, len(total_size)))
plt.plot(x_vals, total_size, marker='', linestyle='-', label="Total requested")
for top5 in total_top5:
plt.plot(x_vals, total_top5[top5], label=top5)
plt.legend()
plt.xlabel("Allocations")
plt.ylabel("mem in kb")
plt.title("Memusage profile")
plt.savefig(path)
plt.clf()
def plot_hist_ascii(hist, calls, path):
bins = {}
bin = 1
for size in sorted(hist):
if int(size) > bin * 16:
bin += 1
bins[bin] = bins.get(bin, 0) + hist[size]
total = sum(calls.values())
with open(path, "w") as f:
print("Total function calls:", total, file=f)
print("malloc:", calls["malloc"], file=f)
print("calloc:", calls["calloc"], file=f)
print("realloc:", calls["realloc"], file=f)
print("free:", calls["free"], file=f)
print("memalign:", calls["memalign"], file=f)
print("Histogram of sizes:", file=f)
for b in sorted(bins):
perc = bins[b]/total*100
hist_line = "{} - {}\t{}\t{:.2}% {}"
print(hist_line.format((b-1)*16, b*16-1, bins[b], perc, '*'*int(perc/2)), file=f)
......@@ -77,24 +77,25 @@ class Benchmark_DJ_Trace( Benchmark ):
sys.stderr.write("\n")
return True
def process_stdout(self, result, stdout, verbose):
def process_output(self, result, stdout, target, perm, verbose):
def to_int(s):
return int(s.replace(',', ""))
regexs = {7:malloc_re ,8:calloc_re, 9:realloc_re, 10:free_re}
functions = {7:"malloc", 8:"calloc", 9:"realloc", 10:"free"}
for i, l in enumerate(stdout.splitlines()):
if i == 3:
result["Max_RSS"] = to_int(max_rss_re.match(l).group("rss"))
elif i == 4:
result["Ideal_RSS"] = to_int(ideal_rss_re.match(l).group("rss"))
elif i == 7:
result["avg_malloc"] = to_int(malloc_re.match(l).group("time"))
elif i == 8:
result["avg_calloc"] = to_int(calloc_re.match(l).group("time"))
elif i == 9:
result["avg_realloc"] = to_int(realloc_re.match(l).group("time"))
elif i == 10:
result["avg_free"] = to_int(free_re.match(l).group("time"))
elif i in [7, 8, 9, 10]:
res = regexs[i].match(l)
fname = functions[i]
result["avg_" + fname] = to_int(res.group("time"))
if not perm.workload in self.results:
self.results[perm.workload] = {"malloc_calls":0, "calloc_calls":0,
"realloc_calls":0, "free_calls":0}
self.results[perm.workload][fname + "_calls"] = res.group("calls")
def summary(self, sd=None):
args = self.results["args"]
......@@ -116,20 +117,24 @@ class Benchmark_DJ_Trace( Benchmark ):
plt.clf()
# Function Times
xa = np.arange(0, 6, 1.5)
for perm in self.iterate_args():
for i, target in enumerate(targets):
x_vals = [x-i/8 for x in range(0,4)]
x_vals = [x-i/len(targets) for x in xa]
y_vals = [0] * 4
y_vals[0] = np.mean([x["avg_malloc"] for x in self.results[target][perm]])
y_vals[1] = np.mean([x["avg_calloc"] for x in self.results[target][perm]])
y_vals[2] = np.mean([x["avg_realloc"] for x in self.results[target][perm]])
y_vals[3] = np.mean([x["avg_free"] for x in self.results[target][perm]])
plt.bar(x_vals, y_vals, width=0.2, align="center",
plt.bar(x_vals, y_vals, width=0.25, align="center",
label=target, color=targets[target]["color"])
plt.legend(loc="best")
plt.xticks(range(0,4), ["malloc", "calloc", "realloc", "free"])
plt.ylabel("Avg time in ms")
plt.xticks(xa, ["malloc\n" + str(self.results[perm.workload]["malloc_calls"]) + "\ncalls",
"calloc\n" + str(self.results[perm.workload]["calloc_calls"]) + "\ncalls",
"realloc\n" + str(self.results[perm.workload]["realloc_calls"]) + "\ncalls",
"free\n" + str(self.results[perm.workload]["free_calls"]) + "\ncalls"])
plt.ylabel("Avg ticks per function")
plt.title("Avg API call times " + perm.workload + ":")
plt.savefig(os.path.join(sd, ".".join([self.name, perm.workload, "apitimes", "png"])))
plt.clf()
......
......@@ -26,7 +26,7 @@ class Benchmark_Falsesharing( Benchmark ):
self.requirements = ["build/cache-thrash", "build/cache-scratch"]
super().__init__()
def process_stdout(self, result, stdout, verbose):
def process_output(self, result, stdout, stderr, target, perm, verbose):
result["time"] = time_re.match(stdout).group("time")
def summary(self, sd=None):
......
......@@ -26,7 +26,7 @@ class Benchmark_Larson( Benchmark ):
self.requirements = ["build/larson"]
super().__init__()
def process_stdout(self, result, stdout, verbose):
def process_output(self, result, stdout, stderr, target, perm, verbose):
for l in stdout.splitlines():
res = throughput_re.match(l)
if res:
......
......@@ -36,7 +36,7 @@ class Benchmark_MYSQL( Benchmark ):
self.args = {"nthreads" : range(1, multiprocessing.cpu_count() * 4 + 1, 2)}