Commit 213e5425 authored by Nicolas Pfeiffer's avatar Nicolas Pfeiffer Committed by Florian Schmaus
Browse files

Nowa: A wait-free continuation-stealing concurrency platform


Co-authored-by: Florian Schmaus's avatarFlorian Schmaus <flow@cs.fau.de>
parent e59ff3a8
[submodule "Hoard"]
path = Hoard
url = https://github.com/chaoran/Hoard
[submodule "fibril"]
path = fibril
url = git@gitlab.cs.fau.de:i4/manycore/fibril.git
branch = fibril-nowa-comparision-adjustments
The MIT License (MIT)
Copyright (c) 2014 Chaoran Yang
Original work Copyright (c) 2014 Chaoran Yang
Modified work Copyright (c) 2020 Nicolas Pfeiffer, Florian Schmaus
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
......
# fibril
Nowa: A Wait-Free Continuation-Stealing Concurrency Platform
============================================================
## Install
Nowa [1] is a non-blocking wait-free continuation-stealing runtime system
of a concurrency platform. Nowa is based on Fibril [2] which can be
found at https://github.com/chaoran/fibril/
```
./bootstrap
./configure
make
make install
```
Setup
-----
## Test
```
make check
./build
```
## Benchmark
By default, `make check` will run standard tests AND benchmarks. To run benchmarks only,
```
cd benchmark
make check
```
Test
----
To run the benchmarks with serial version, do
```
cd benchmark/serial
make check
```
You can also compare the performance of **fibril** with **Intel CilkPlus**, or **Intel Threading Building Blocks**. To run these versions, you have to have a compiler that supports these frameworks. GCC 5+ supports Intel CilkPlus natively. To run these benchmarks, do
```
cd benchmark/[cilkplus or tbb]
make check
```
References
----------
1: F. Schmaus et al., “Nowa: A Wait-Free Continuation-Stealing
Concurrency Platform”. In: 2021 IEEE International Parallel and
Distributed Processing Symposium (IPDPS). 2021.
2: C. Yang and J. Mellor-Crummey, “A practical solution to the cactus
stack problem”, in Proceedings of the 28th ACM Symposium on
Parallelism in Algorithms and Architectures, ser. SPAA ’16,
VPATH = $(top_srcdir)/test
AM_CPPFLAGS = -I$(top_srcdir)/fibril/build/include/ -DBENCHMARK
AM_LDFLAGS = -L$(top_srcdir)/fibril/build/lib/ -lfibril
check_PROGRAMS = \
cholesky \
fft \
fib \
heat \
integrate \
knapsack \
lu \
matmul \
nqueens \
quicksort \
rectmul \
strassen
cholesky_LDADD = -lm
fft_LDADD = -lm
heat_LDADD = -lm
lu_LDADD = -lm
strassen_LDADD = -lm
TESTS = $(check_PROGRAMS)
include $(srcdir)/../Makefile.am
include $(srcdir)/../Makefile.am
AM_CFLAGS = -std=c++17
AM_CPPFLAGS += -x c++ -DFIBRIL_OPENMP -fopenmp -fpermissive
#AM_LDFLAGS += -lomp -L/usr/lib -lstdc++
AM_LDFLAGS += /usr/lib/llvm-11/lib/libomp-11.so.5 -lstdc++
include $(srcdir)/../Makefile.am
AM_CFLAGS = -std=c++17
AM_CPPFLAGS += -x c++ -DFIBRIL_OPENMP -fopenmp -fpermissive
AM_LDFLAGS += -fopenmp -lstdc++
#!/usr/bin/env bash
git submodule update --init --remote
cd fibril
./bootstrap
mkdir -p build
./configure --prefix=$(pwd)/build
make
make install
cd -
./bootstrap
mkdir -p build
./configure --prefix=$(pwd)/build
make
make install
exit 0
......@@ -46,5 +46,9 @@ AC_CONFIG_FILES([Makefile
benchmark/Makefile
benchmark/cilkplus/Makefile
benchmark/tbb/Makefile
benchmark/nowa/Makefile
benchmark/fibril/Makefile
benchmark/openmp/Makefile
benchmark/openmp-libomp/Makefile
benchmark/serial/Makefile])
AC_OUTPUT
Subproject commit e59ff3a8ce1770b218ea0d7c594194a2a0566958
/*.tex
/*.aux
/*.log
/*.pdf
/*.rubbercache
\usepackage{siunitx}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.16}
\usepgfplotslibrary{groupplots}
\definecolor{vibrantBlue}{HTML}{0077BB}
\definecolor{vibrantCyan}{HTML}{33BBEE}
\definecolor{vibrantTeal}{HTML}{009988}
\definecolor{vibrantOrange}{HTML}{EE7733}
\definecolor{vibrantRed}{HTML}{CC3311}
\definecolor{vibrantMagenta}{HTML}{EE3377}
\definecolor{vibrantGrey}{HTML}{BBBBBB}
\pgfplotscreateplotcyclelist{vibrant}{
vibrantMagenta,every mark/.append style={fill=vibrantMagenta!85!black},mark=diamond*\\
vibrantCyan,every mark/.append style={fill=vibrantCyan!85!black},mark=square*\\
vibrantOrange,every mark/.append style={fill=vibrantOrange!85!black},mark=otimes*\\
vibrantTeal,every mark/.append style={fill=vibrantTeal!85!black},mark=triangle*\\
vibrantBlue,mark=star\\
}
\usepackage{xparse}
\NewDocumentCommand{\ourWaitFreeAlgo}{}{Nowa}
\NewDocumentCommand{\applicationName}{m}{\textsl{#1}}
\NewDocumentCommand{\cholesky}{}{\applicationName{cholesky}}
\NewDocumentCommand{\fft}{}{\applicationName{fft}}
\NewDocumentCommand{\fib}{}{\applicationName{fib}}
\NewDocumentCommand{\heat}{}{\applicationName{heat}}
\NewDocumentCommand{\integrate}{}{\applicationName{integrate}}
\NewDocumentCommand{\knapsack}{}{\applicationName{knapsack}}
\NewDocumentCommand{\lu}{}{\applicationName{lu}}
\NewDocumentCommand{\matmul}{}{\applicationName{matmul}}
\NewDocumentCommand{\nqueens}{}{\applicationName{nqueens}}
\NewDocumentCommand{\quicksort}{}{\applicationName{quicksort}}
\NewDocumentCommand{\rectmul}{}{\applicationName{rectmul}}
\NewDocumentCommand{\strassen}{}{\applicationName{strassen}}
\NewDocumentCommand{\libomp}{}{\texttt{libomp}}
\NewDocumentCommand{\libgomp}{}{\texttt{lib\emph{g}omp}}
#!/bin/bash
# $1 prefix for plot tex files
# $2 path with data
if [ $# -ne 2 ]; then
echo "usage"
exit 1
fi
set -x
set -e
echo -n '' > data.tex
./generate_plot.py -o=$1.tex -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa Fibril:$2/fibril "Cilk Plus":$2/cilkplus TBB:$2/tbb -r -l=4
rubber --pdf $1.tex &
./generate_plot.py -o="$1-queues.tex" -s=$2/serial "\\ourWaitFreeAlgo{} (CL-queue)":$2/nowa "\\ourWaitFreeAlgo{} (THE-queue)":$2/nowa-the-queue Fibril:$2/fibril -b=cholesky,fib -c=2 -l=4
rubber --pdf "$1-queues.tex" &
./generate_plot.py -o="$1-queues-4.tex" -s=$2/serial "\\ourWaitFreeAlgo{} (CL-queue)":$2/nowa "\\ourWaitFreeAlgo{} (THE-queue)":$2/nowa-the-queue Fibril:$2/fibril -b=cholesky,fib,nqueens,matmul -c=2 -l=4
rubber --pdf "$1-queues-4.tex" &
./generate_plot.py -o="$1-with-madvise.tex" -s=$2/serial "\\ourWaitFreeAlgo{} w/o \\texttt{madvise()}":$2/nowa "\\ourWaitFreeAlgo{} w/ madvise":$2/nowa-madvise "Cilk Plus":$2/cilkplus -b=cholesky,lu -c=2 -l=4
rubber --pdf "$1-with-madvise.tex" &
./generate_plot.py -o="$1-with-madvise-4.tex" -s=$2/serial "\\ourWaitFreeAlgo{} w/o \\texttt{madvise()}":$2/nowa "\\ourWaitFreeAlgo{} w/ \\texttt{madvise()}":$2/nowa-madvise "Cilk Plus":$2/cilkplus -b=cholesky,lu,matmul,nqueens -c=2 -l=4
rubber --pdf "$1-with-madvise-4.tex" &
./generate_plot.py -o="$1-with-madvise-6.tex" -s=$2/serial "\\ourWaitFreeAlgo{} w/o \\texttt{madvise()}":$2/nowa "\\ourWaitFreeAlgo{} w/ \\texttt{madvise()}":$2/nowa-madvise "Cilk Plus":$2/cilkplus -b=cholesky,lu,matmul,nqueens,integrate,rectmul -c=2 -l=4
rubber --pdf "$1-with-madvise-6.tex" &
./generate_plot.py -o="$1-with-madvise-8.tex" -s=$2/serial "\\ourWaitFreeAlgo{} w/o \\texttt{madvise()}":$2/nowa "\\ourWaitFreeAlgo{} w/ \\texttt{madvise()}":$2/nowa-madvise "Cilk Plus":$2/cilkplus -b=cholesky,lu,heat,fib,matmul,nqueens,integrate,rectmul -c=2 -l=4
rubber --pdf "$1-with-madvise-8.tex" &
./generate_plot.py -o="$1-with-madvise-all.tex" -s=$2/serial "\\ourWaitFreeAlgo{} w/o \\texttt{madvise()}":$2/nowa "\\ourWaitFreeAlgo{} w/ \\texttt{madvise()}":$2/nowa-madvise "Cilk Plus":$2/cilkplus -c=2 -l=4
rubber --pdf "$1-with-madvise-all.tex" &
./generate_plot.py -o="$1-teaser.tex" -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa Fibril:$2/fibril "Cilk Plus":$2/cilkplus TBB:$2/tbb -b=nqueens -l=4
rubber --pdf "$1-teaser.tex" &
./generate_plot.py -o="$1-queues-appendix.tex" -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa "\\ourWaitFreeAlgo{} THE":$2/nowa-the-queue Fibril:$2/fibril -r -l=4
rubber --pdf "$1-queues-appendix.tex" &
#./generate_plot.py -o="$1-with-madvise-appendix.tex" -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa "\\ourWaitFreeAlgo{} w/ \\texttt{madvise()}":$2/nowa-madvise "Cilk Plus":$2/cilkplus -r -l=4
#rubber --pdf "$1-with-madvise-appendix.tex" &
./generate_plot.py -o="$1-fibril-with-madvise-appendix.tex" -s=$2/serial "Fibril":$2/fibril "Fibril w/ \\texttt{madvise()}":$2/fibril-madvise "Cilk Plus":$2/cilkplus -r -l=4
rm "$1-fibril-with-madvise-appendix.tex"
#rubber --pdf "$1-fibril-with-madvise-appendix.tex" &
./generate_plot.py -o="$1-with-madvise-appendix.tex" -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa Fibril:$2/fibril "Cilk Plus":$2/cilkplus "\\ourWaitFreeAlgo{} w/ \\texttt{madvise()}":$2/nowa-madvise "Fibril w/ \\texttt{madvise()}":$2/fibril-madvise -r -l=4
rubber --pdf "$1-with-madvise-appendix.tex" &
# ./generate_plot.py -o="$1-openmp-4.tex" -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa Fibril:$2/fibril "Cilk Plus":$2/cilkplus TBB:$2/tbb "OpenMP (libgomp)":$2/openmp -c=2 -l=5 -l -u -b=fib,heat,knapsack,strassen
# rubber --pdf "$1-openmp-4.tex" &
# ./generate_plot.py -o="$1-openmp-6.tex" -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa Fibril:$2/fibril "Cilk Plus":$2/cilkplus TBB:$2/tbb "OpenMP (libgomp)":$2/openmp -c=2 -l=5 -l -u -b=fft,fib,heat,knapsack,quicksort,strassen
# rubber --pdf "$1-openmp-6.tex" &
./generate_plot.py -o="$1-openmp-full.tex" -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa TBB:$2/tbb "OpenMP (libgomp)":$2/openmp "OpenMP (libomp)":$2/openmp-gcc-libomp -l=5 -l -u -r
rubber --pdf "$1-openmp-full.tex" &
./generate_plot.py -o="$1-openmp-full-w-tied.tex" -s=$2/serial "\\ourWaitFreeAlgo{}":$2/nowa TBB:$2/tbb "\\libgomp{}":$2/openmp "\\libomp{} (untied)":$2/openmp-gcc-libomp "\\libomp{} (tied)":$2/openmp-gcc-libomp-tied-tasks -l=5 -l -u -r
rubber --pdf "$1-openmp-full-w-tied.tex" &
wait -f
#!/usr/bin/env python3
import sys
import csv
import statistics as stats
default_benchmarks = [
'cholesky', 'fft', 'fib', 'heat', 'integrate', 'knapsack', 'lu', 'matmul',
'nqueens', 'quicksort', 'rectmul', 'strassen'
]
# FIXME fix xtick for artifact / xmin / title
#title style={\n\
#at={(0.5,0.9)},\n\
#font=\small,\n\
#},\n\
#legend style={\n\
# at={(-0.00,1.15)},\n\
# anchor=south west,\n\
# font=\small,\n\
# draw=none,\n\
#},\n\
#height=4.5cm,\n\
#width=7cm,\n\
plotTopGeneric = "\
\documentclass{standalone}\n\
\input{common.tex}\n\
\n\
\\begin{document}\n\
\\begin{tikzpicture}\n\
\\begin{groupplot}[\n\
group style={\n\
group name=%s,\n\
group size=%d by %d,\n\
vertical sep=0.65cm,\n\
horizontal sep=%.2fcm,\n\
xticklabels at=edge bottom,\n\
xlabels at=edge bottom,\n\
ylabels at=edge left,\n\
},\n\
cycle list name=vibrant,\n\
title style={\n\
at={(0.5,0.9)},\n\
font=\small,\n\
},\n\
xmin=0,\n\
xtick pos=bottom,\n\
ytick pos=left,\n\
xminorticks=false,\n\
yminorticks=false,\n\
ymajorgrids=true,\n\
height=4.0cm,\n\
width=6.5cm,\n\
enlarge x limits={\n\
auto,\n\
value=0.04,\n\
},\n\
legend columns=%d,\n\
legend style={\n\
at={(%s,1.2)},\n\
anchor=south,\n\
font=\small,\n\
draw=none,\n\
},\n\
%s\
]\n"
plotTopLinear = "\
xtick={64,128,192,256},\n\
ymin=0,\n\
"
plotTopLogarithmic = "\
xmode=log,\n\
ymode=log,\n\
xtick={64,128,256},\n\
xticklabels={64,128,256},\n\
"
#at={(1.05,1.15)},\n\
#at={(0.5,1.15)},\n\
#anchor=south,\n\
plotBottom = "\
\\end{groupplot}\n\
\n\
%s\n\
%s\n\
\n\
\\end{tikzpicture}\n\
\\end{document}"
plotPlotMean = "\
\\addplot+[sharp plot, error bars/.cd, y dir=both, y explicit]\n\
table [x=cores, y=speedup, y error=err]{\n\
cores\tspeedup\terr"
plotPlotMedian = "\
\\addplot+[sharp plot, error bars/.cd, y dir=both, y explicit]\n\
table [x=cores, y=speedup, y error plus=err+, y error minus=err-]{\n\
cores\tspeedup\terr-\terr+"
def usage():
print("usage:")
print("./generate_plot.py OUTFILE SERIAL [NAME:PATH]...")
print("FIXME")
print("\tOUTFILE\t\tfilename of generated plot tex file")
print("\tSERIAL\t\tpath to the .csv files with the serial times")
print(
"\tNAME:PATH\ttuples of the name in the plots and the path to the .csv files"
)
def fixName(name):
n = ""
for c in name:
if c == '_':
n += '\\'
n += c
return n
def do_dataref_replacement(name):
dataref_replacements = {
'\\ourWaitFreeAlgo{}' : 'nowa',
'\\libomp{}' : 'libomp',
'\\libgomp{}' : 'libgomp',
}
for original, substitute in dataref_replacements.items():
name = name.replace(original, substitute)
name = name.replace('/', '')
return name
def main(args):
if len(args) < 2:
usage()
sys.exit(1)
keys = {
's': lambda v: locals().__setitem__('serialPath', v),
'o': lambda v: locals().__setitem__('output', v)
}
rts = []
benchmarks = default_benchmarks
columns = 0
plot_format = (2, 6)
generateRelativeNumbers = False
legendColumns = 0
logscale = False
uniformPlotPoints = False
for arg in args[1:]:
if arg[0] == '-':
if '=' in arg:
key, val = arg[1:].split('=', 1)
if key == 'o':
output = val
plot_name = output.split('.')[0]
elif key == 's':
serialPath = val
elif key == 'l':
try:
legendColumns = int(val)
except:
usage()
sys.exit(1)
elif key == 'c':
try:
columns = int(val)
except:
usage()
sys.exit(1)
elif key == 'b':
benchmarks = val.split(',')
else:
usage()
sys.exit(1)
else:
key = arg[1:]
if key == 'h':
usage()
sys.exit(0)
elif key == 'r':
generateRelativeNumbers = True
elif key == 'l':
logscale = True
elif key == 'u':
uniformPlotPoints = True
else:
usage()
sys.exit(1)
elif ':' in arg:
rts += [arg.split(':', 1)]
else:
usage()
sys.exit(1)
runtimes = dict(rts)
if columns == 0:
if len(benchmarks) <= 6:
columns = 1
else:
columns = 2
plot_format = (columns, int(len(benchmarks) / columns))
if (len(benchmarks) % columns) != 0:
print('WARNING: Not all columns of plots are filled!')
if legendColumns == 0:
legendColumns = 2 * columns
data = dict()
for r in runtimes:
data[r] = dict()
for b in benchmarks:
data[r][b] = dict()
with open(f"{runtimes[r]}/{b}.csv", newline='') as csvfile:
reader = csv.DictReader(csvfile, dialect='excel-tab')
for row in reader:
if data[r][b].get(row['cores']) == None:
data[r][b][row['cores']] = []
data[r][b][row['cores']] += [row]
serial = dict()
for b in benchmarks:
serial[b] = []
with open(f"{serialPath}/{b}.csv", newline='') as csvfile:
reader = csv.DictReader(csvfile, dialect='excel-tab')
for row in reader:
serial[b] += [row]
serialTimeMedi = dict()
stdevSerialTimeMedi = dict()
serialTimeMean = dict()
stdevSerialTimeMean = dict()
for b in benchmarks:
floatValues = list(map(lambda a: float(a['time']), serial[b]))
serialTimeMedi[b] = stats.median(floatValues)
stdevSerialTimeMedi[b] = stats.stdev(floatValues, serialTimeMedi[b])
serialTimeMean[b] = stats.mean(floatValues)
stdevSerialTimeMean[b] = stats.stdev(floatValues, serialTimeMean[b])
speedup = dict()
for r in runtimes:
speedup[r] = dict()
for b in benchmarks:
speedup[r][b] = []
for core_count in list(data[r][b]):
speedups = list(
map(lambda a: serialTimeMean[b] / float(a['time']),
data[r][b][core_count]))
# N.B. using geometric mean here.
mean_speedup = stats.geometric_mean(speedups)
standard_deviation = stats.stdev(speedups, mean_speedup)
speedup[r][b] += [
(core_count, mean_speedup, standard_deviation)
]
# Sort the speedup dict by core count.
# This is usually not required, as the core count is already sorted.
speedup[r][b].sort(key=lambda a: int(a[0]))
if generateRelativeNumbers:
r_ref = rts[0][0]
if False:
mc = str(
max(map(lambda x: int(x), data[r_ref][benchmarks[0]].keys())))
for r in runtimes:
for b in benchmarks:
d = data[r][b][mc]
rss = sum(map(lambda x: int(x['rss']), d))
d.sort(key=lambda x: int(x['run']))
maxrss = d[len(d) - 1]['maxrss']
print(f'{r} {b} {rss} {maxrss}')
pass
max_cores = len(speedup[r_ref][benchmarks[0]]) - 1
rrp = do_dataref_replacement(r_ref)
output_relative = 'data.tex'
with open(output_relative, mode='at', newline='') as outfile:
max_runs = len(serial[benchmarks[0]])
print(f'\\drefset{{/meta/maxruns}}{{{max_runs}}}', file=outfile)
for r in runtimes:
rp = do_dataref_replacement(r)
mc = str(
max(
map(lambda x: int(x),
data[r_ref][benchmarks[0]].keys())))
for b in benchmarks:
d = data[r][b].get(mc, None)
if d is None:
print(f"Warning: No data available for {r}/{b}/{mc} cores", file=sys.stderr)
continue
rss = round(
int(sum(map(lambda x: int(x['rss']), d))) / 1024, 0)
d.sort(key=lambda x: int(x['run']))
maxrss = round(int(d[len(d) - 1]['maxrss']) / 1024, 0)
entry = speedup[r][b][len(speedup[r][b]) - 1]
times = list(map(lambda a: float(a['time']), data[r][b][mc]))
spdupMean = round(entry[1], 3)
spdupStdev = round(entry[2], 3)
tMean = round(stats.mean(times), 3)
tStdev = round(stats.stdev(times, tMean), 3)
print(
f'\\drefset[unit=\\mebi\\byte]{{/mem/rss/{rp.lower()}/{b}}}{{{rss}}}',
file=outfile)
print(
f'\\drefset[unit=\\mebi\\byte]{{/mem/maxrss/{rp.lower()}/{b}}}{{{maxrss}}}',
file=outfile)
print(
f'\\drefset[unit=\\second]{{/perf/abs/time/mean/{rp.lower()}/{b}}}{{{tMean}}}',
file=outfile)
print(
f'\\drefset[unit=\\second]{{/perf/abs/time/stdev/{rp.lower()}/{b}}}{{{tStdev}}}',
file=outfile)
print(
f'\\drefset{{/perf/abs/speedup/mean/{rp.lower()}/{b}}}{{{spdupMean}}}',
file=outfile)
print(
f'\\drefset{{/perf/abs/speedup/stdev/{rp.lower()}/{b}}}{{{spdupStdev}}}',
file=outfile)
if speedup.get(r) == None:
print(f"Ignoring {r} as there are no speedup values", file=sys.stderr)
if r == r_ref:
continue
divs = dict()
best = 0
worst = 10e10
for b in benchmarks: