diff --git a/eval.py b/eval.py
index 3869ad9b24155b4965230206b5d2414237424a47..864bb889bbfe19666b0ca346d3d8bf6c26a0a881 100755
--- a/eval.py
+++ b/eval.py
@@ -89,11 +89,30 @@ def prepare_env(update_env: T.MutableMapping) -> T.Dict:
     return current_env
 
 
+PERF_EXE = 'perf'
+PERF_EVENT_SELECTION = '-dd'
+
+
 def main(args):
     """Run an evaluation"""
     for target, target_conf in TARGETS.items():
         cmd = target_conf['cmd']
 
+        if args.flamegraph:
+            perf_out = RESULT_DIR / f'{target}.perf.data'
+            cmd = f'{PERF_EXE} record --call-graph dwarf -o {perf_out} {cmd}'
+
+        elif args.perf_stats or args.perf_record:
+            perf_event_selection = ','.join(
+                args.perf_stats) if args.perf_stats else PERF_EVENT_SELECTION
+
+            if args.perf_record:
+                perf_out = RESULT_DIR / f'{target}.perf.data'
+                cmd = f'{PERF_EXE} record -g {perf_event_selection} -o {perf_out} {cmd}'
+            else:
+                perf_out = RESULT_DIR / f'{target}.perf.stats'
+                cmd = f'{PERF_EXE} stat {perf_event_selection} -x, -o {perf_out} {cmd}'
+
         print(f"measuring {target} ...\u001b[K\r", end='')
         stats_file = RESULT_DIR / f'{target}.stats'
 
@@ -118,6 +137,18 @@ def main(args):
             os.remove(err_path)
 
 
+def generate_flamegraphs(result_dir):
+    """generate flamegraphs from recorded perf data files"""
+    for path in result_dir.iterdir():
+        if path.suffix != '.data':
+            continue
+
+        print(f'\rGenerating flamgraph from {path.name} ...\u001b[K', end='')
+        cmd = f'{ROOT_DIR/"tools"/"generate-flamegraph.sh"} {path}'
+        subprocess.run(cmd.split(), check=True)
+    print()
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('-v',
@@ -135,6 +166,15 @@ if __name__ == '__main__':
     parser.add_argument('--desc-stats',
                         help='file to store descriptive statistics',
                         type=str)
+    parser.add_argument('--perf-stats',
+                        help='use perf to collect performance counter stats',
+                        nargs='*')
+    parser.add_argument('--perf-record',
+                        help='use perf to record a profile',
+                        action='store_true')
+    parser.add_argument('--flamegraph',
+                        help='generate flamegraphs',
+                        action='store_true')
 
     _args = parser.parse_args()
 
@@ -147,6 +187,11 @@ if __name__ == '__main__':
                              _args.exclude_implementations)
     main(_args)
 
+    if _args.flamegraph:
+        print()
+        generate_flamegraphs(RESULT_DIR)
+        sys.exit(0)
+
     _data = collect(result_dir=RESULT_DIR)
     if _data is None:
         print(f'Error: no data was collected from {RESULT_DIR}',
diff --git a/tools/flamegraph.pl b/tools/flamegraph.pl
new file mode 100755
index 0000000000000000000000000000000000000000..9b9898e526f31081a8461755e200280da059d5aa
--- /dev/null
+++ b/tools/flamegraph.pl
@@ -0,0 +1,1243 @@
+#!/usr/bin/perl -w
+#
+# flamegraph.pl		flame stack grapher.
+#
+# This takes stack samples and renders a call graph, allowing hot functions
+# and codepaths to be quickly identified.  Stack samples can be generated using
+# tools such as DTrace, perf, SystemTap, and Instruments.
+#
+# USAGE: ./flamegraph.pl [options] input.txt > graph.svg
+#
+#        grep funcA input.txt | ./flamegraph.pl [options] > graph.svg
+#
+# Then open the resulting .svg in a web browser, for interactivity: mouse-over
+# frames for info, click to zoom, and ctrl-F to search.
+#
+# Options are listed in the usage message (--help).
+#
+# The input is stack frames and sample counts formatted as single lines.  Each
+# frame in the stack is semicolon separated, with a space and count at the end
+# of the line.  These can be generated for Linux perf script output using
+# stackcollapse-perf.pl, for DTrace using stackcollapse.pl, and for other tools
+# using the other stackcollapse programs.  Example input:
+#
+#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1
+#
+# An optional extra column of counts can be provided to generate a differential
+# flame graph of the counts, colored red for more, and blue for less.  This
+# can be useful when using flame graphs for non-regression testing.
+# See the header comment in the difffolded.pl program for instructions.
+#
+# The input functions can optionally have annotations at the end of each
+# function name, following a precedent by some tools (Linux perf's _[k]):
+# 	_[k] for kernel
+#	_[i] for inlined
+#	_[j] for jit
+#	_[w] for waker
+# Some of the stackcollapse programs support adding these annotations, eg,
+# stackcollapse-perf.pl --kernel --jit. They are used merely for colors by
+# some palettes, eg, flamegraph.pl --color=java.
+#
+# The output flame graph shows relative presence of functions in stack samples.
+# The ordering on the x-axis has no meaning; since the data is samples, time
+# order of events is not known.  The order used sorts function names
+# alphabetically.
+#
+# While intended to process stack samples, this can also process stack traces.
+# For example, tracing stacks for memory allocation, or resource usage.  You
+# can use --title to set the title to reflect the content, and --countname
+# to change "samples" to "bytes" etc.
+#
+# There are a few different palettes, selectable using --color.  By default,
+# the colors are selected at random (except for differentials).  Functions
+# called "-" will be printed gray, which can be used for stack separators (eg,
+# between user and kernel stacks).
+#
+# HISTORY
+#
+# This was inspired by Neelakanth Nadgir's excellent function_call_graph.rb
+# program, which visualized function entry and return trace events.  As Neel
+# wrote: "The output displayed is inspired by Roch's CallStackAnalyzer which
+# was in turn inspired by the work on vftrace by Jan Boerhout".  See:
+# https://blogs.oracle.com/realneel/entry/visualizing_callstacks_via_dtrace_and
+#
+# Copyright 2016 Netflix, Inc.
+# Copyright 2011 Joyent, Inc.  All rights reserved.
+# Copyright 2011 Brendan Gregg.  All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# 11-Oct-2014	Adrien Mahieux	Added zoom.
+# 21-Nov-2013   Shawn Sterling  Added consistent palette file option
+# 17-Mar-2013   Tim Bunce       Added options and more tunables.
+# 15-Dec-2011	Dave Pacheco	Support for frames with whitespace.
+# 10-Sep-2011	Brendan Gregg	Created this.
+
+use strict;
+
+use Getopt::Long;
+
+use open qw(:std :utf8);
+
+# tunables
+my $encoding;
+my $fonttype = "Verdana";
+my $imagewidth = 1200;          # max width, pixels
+my $frameheight = 16;           # max height is dynamic
+my $fontsize = 12;              # base text size
+my $fontwidth = 0.59;           # avg width relative to fontsize
+my $minwidth = 0.1;             # min function width, pixels
+my $nametype = "Function:";     # what are the names in the data?
+my $countname = "samples";      # what are the counts in the data?
+my $colors = "hot";             # color theme
+my $bgcolors = "";              # background color theme
+my $nameattrfile;               # file holding function attributes
+my $timemax;                    # (override the) sum of the counts
+my $factor = 1;                 # factor to scale counts by
+my $hash = 0;                   # color by function name
+my $palette = 0;                # if we use consistent palettes (default off)
+my %palette_map;                # palette map hash
+my $pal_file = "palette.map";   # palette map file name
+my $stackreverse = 0;           # reverse stack order, switching merge end
+my $inverted = 0;               # icicle graph
+my $flamechart = 0;             # produce a flame chart (sort by time, do not merge stacks)
+my $negate = 0;                 # switch differential hues
+my $titletext = "";             # centered heading
+my $titledefault = "Flame Graph";	# overwritten by --title
+my $titleinverted = "Icicle Graph";	#   "    "
+my $searchcolor = "rgb(230,0,230)";	# color for search highlighting
+my $notestext = "";		# embedded notes in SVG
+my $subtitletext = "";		# second level title (optional)
+my $help = 0;
+
+sub usage {
+	die <<USAGE_END;
+USAGE: $0 [options] infile > outfile.svg\n
+	--title TEXT     # change title text
+	--subtitle TEXT  # second level title (optional)
+	--width NUM      # width of image (default 1200)
+	--height NUM     # height of each frame (default 16)
+	--minwidth NUM   # omit smaller functions (default 0.1 pixels)
+	--fonttype FONT  # font type (default "Verdana")
+	--fontsize NUM   # font size (default 12)
+	--countname TEXT # count type label (default "samples")
+	--nametype TEXT  # name type label (default "Function:")
+	--colors PALETTE # set color palette. choices are: hot (default), mem,
+	                 # io, wakeup, chain, java, js, perl, red, green, blue,
+	                 # aqua, yellow, purple, orange
+	--bgcolors COLOR # set background colors. gradient choices are yellow
+	                 # (default), blue, green, grey; flat colors use "#rrggbb"
+	--hash           # colors are keyed by function name hash
+	--cp             # use consistent palette (palette.map)
+	--reverse        # generate stack-reversed flame graph
+	--inverted       # icicle graph
+	--flamechart     # produce a flame chart (sort by time, do not merge stacks)
+	--negate         # switch differential hues (blue<->red)
+	--notes TEXT     # add notes comment in SVG (for debugging)
+	--help           # this message
+
+	eg,
+	$0 --title="Flame Graph: malloc()" trace.txt > graph.svg
+USAGE_END
+}
+
+GetOptions(
+	'fonttype=s'  => \$fonttype,
+	'width=i'     => \$imagewidth,
+	'height=i'    => \$frameheight,
+	'encoding=s'  => \$encoding,
+	'fontsize=f'  => \$fontsize,
+	'fontwidth=f' => \$fontwidth,
+	'minwidth=f'  => \$minwidth,
+	'title=s'     => \$titletext,
+	'subtitle=s'  => \$subtitletext,
+	'nametype=s'  => \$nametype,
+	'countname=s' => \$countname,
+	'nameattr=s'  => \$nameattrfile,
+	'total=s'     => \$timemax,
+	'factor=f'    => \$factor,
+	'colors=s'    => \$colors,
+	'bgcolors=s'  => \$bgcolors,
+	'hash'        => \$hash,
+	'cp'          => \$palette,
+	'reverse'     => \$stackreverse,
+	'inverted'    => \$inverted,
+	'flamechart'  => \$flamechart,
+	'negate'      => \$negate,
+	'notes=s'     => \$notestext,
+	'help'        => \$help,
+) or usage();
+$help && usage();
+
+# internals
+my $ypad1 = $fontsize * 3;      # pad top, include title
+my $ypad2 = $fontsize * 2 + 10; # pad bottom, include labels
+my $ypad3 = $fontsize * 2;      # pad top, include subtitle (optional)
+my $xpad = 10;                  # pad lefm and right
+my $framepad = 1;		# vertical padding for frames
+my $depthmax = 0;
+my %Events;
+my %nameattr;
+
+if ($flamechart && $titletext eq "") {
+	$titletext = "Flame Chart";
+}
+
+if ($titletext eq "") {
+	unless ($inverted) {
+		$titletext = $titledefault;
+	} else {
+		$titletext = $titleinverted;
+	}
+}
+
+if ($nameattrfile) {
+	# The name-attribute file format is a function name followed by a tab then
+	# a sequence of tab separated name=value pairs.
+	open my $attrfh, $nameattrfile or die "Can't read $nameattrfile: $!\n";
+	while (<$attrfh>) {
+		chomp;
+		my ($funcname, $attrstr) = split /\t/, $_, 2;
+		die "Invalid format in $nameattrfile" unless defined $attrstr;
+		$nameattr{$funcname} = { map { split /=/, $_, 2 } split /\t/, $attrstr };
+	}
+}
+
+if ($notestext =~ /[<>]/) {
+	die "Notes string can't contain < or >"
+}
+
+# background colors:
+# - yellow gradient: default (hot, java, js, perl)
+# - green gradient: mem
+# - blue gradient: io, wakeup, chain
+# - gray gradient: flat colors (red, green, blue, ...)
+if ($bgcolors eq "") {
+	# choose a default
+	if ($colors eq "mem") {
+		$bgcolors = "green";
+	} elsif ($colors =~ /^(io|wakeup|chain)$/) {
+		$bgcolors = "blue";
+	} elsif ($colors =~ /^(red|green|blue|aqua|yellow|purple|orange)$/) {
+		$bgcolors = "grey";
+	} else {
+		$bgcolors = "yellow";
+	}
+}
+my ($bgcolor1, $bgcolor2);
+if ($bgcolors eq "yellow") {
+	$bgcolor1 = "#eeeeee";       # background color gradient start
+	$bgcolor2 = "#eeeeb0";       # background color gradient stop
+} elsif ($bgcolors eq "blue") {
+	$bgcolor1 = "#eeeeee"; $bgcolor2 = "#e0e0ff";
+} elsif ($bgcolors eq "green") {
+	$bgcolor1 = "#eef2ee"; $bgcolor2 = "#e0ffe0";
+} elsif ($bgcolors eq "grey") {
+	$bgcolor1 = "#f8f8f8"; $bgcolor2 = "#e8e8e8";
+} elsif ($bgcolors =~ /^#......$/) {
+	$bgcolor1 = $bgcolor2 = $bgcolors;
+} else {
+	die "Unrecognized bgcolor option \"$bgcolors\""
+}
+
+# SVG functions
+{ package SVG;
+	sub new {
+		my $class = shift;
+		my $self = {};
+		bless ($self, $class);
+		return $self;
+	}
+
+	sub header {
+		my ($self, $w, $h) = @_;
+		my $enc_attr = '';
+		if (defined $encoding) {
+			$enc_attr = qq{ encoding="$encoding"};
+		}
+		$self->{svg} .= <<SVG;
+<?xml version="1.0"$enc_attr standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg version="1.1" width="$w" height="$h" onload="init(evt)" viewBox="0 0 $w $h" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<!-- Flame graph stack visualization. See https://github.com/brendangregg/FlameGraph for latest version, and http://www.brendangregg.com/flamegraphs.html for examples. -->
+<!-- NOTES: $notestext -->
+SVG
+	}
+
+	sub include {
+		my ($self, $content) = @_;
+		$self->{svg} .= $content;
+	}
+
+	sub colorAllocate {
+		my ($self, $r, $g, $b) = @_;
+		return "rgb($r,$g,$b)";
+	}
+
+	sub group_start {
+		my ($self, $attr) = @_;
+
+		my @g_attr = map {
+			exists $attr->{$_} ? sprintf(qq/$_="%s"/, $attr->{$_}) : ()
+		} qw(id class);
+		push @g_attr, $attr->{g_extra} if $attr->{g_extra};
+		if ($attr->{href}) {
+			my @a_attr;
+			push @a_attr, sprintf qq/xlink:href="%s"/, $attr->{href} if $attr->{href};
+			# default target=_top else links will open within SVG <object>
+			push @a_attr, sprintf qq/target="%s"/, $attr->{target} || "_top";
+			push @a_attr, $attr->{a_extra}                           if $attr->{a_extra};
+			$self->{svg} .= sprintf qq/<a %s>\n/, join(' ', (@a_attr, @g_attr));
+		} else {
+			$self->{svg} .= sprintf qq/<g %s>\n/, join(' ', @g_attr);
+		}
+
+		$self->{svg} .= sprintf qq/<title>%s<\/title>/, $attr->{title}
+			if $attr->{title}; # should be first element within g container
+	}
+
+	sub group_end {
+		my ($self, $attr) = @_;
+		$self->{svg} .= $attr->{href} ? qq/<\/a>\n/ : qq/<\/g>\n/;
+	}
+
+	sub filledRectangle {
+		my ($self, $x1, $y1, $x2, $y2, $fill, $extra) = @_;
+		$x1 = sprintf "%0.1f", $x1;
+		$x2 = sprintf "%0.1f", $x2;
+		my $w = sprintf "%0.1f", $x2 - $x1;
+		my $h = sprintf "%0.1f", $y2 - $y1;
+		$extra = defined $extra ? $extra : "";
+		$self->{svg} .= qq/<rect x="$x1" y="$y1" width="$w" height="$h" fill="$fill" $extra \/>\n/;
+	}
+
+	sub stringTTF {
+		my ($self, $id, $x, $y, $str, $extra) = @_;
+		$x = sprintf "%0.2f", $x;
+		$id =  defined $id ? qq/id="$id"/ : "";
+		$extra ||= "";
+		$self->{svg} .= qq/<text $id x="$x" y="$y" $extra>$str<\/text>\n/;
+	}
+
+	sub svg {
+		my $self = shift;
+		return "$self->{svg}</svg>\n";
+	}
+	1;
+}
+
+sub namehash {
+	# Generate a vector hash for the name string, weighting early over
+	# later characters. We want to pick the same colors for function
+	# names across different flame graphs.
+	my $name = shift;
+	my $vector = 0;
+	my $weight = 1;
+	my $max = 1;
+	my $mod = 10;
+	# if module name present, trunc to 1st char
+	$name =~ s/.(.*?)`//;
+	foreach my $c (split //, $name) {
+		my $i = (ord $c) % $mod;
+		$vector += ($i / ($mod++ - 1)) * $weight;
+		$max += 1 * $weight;
+		$weight *= 0.70;
+		last if $mod > 12;
+	}
+	return (1 - $vector / $max)
+}
+
+sub color {
+	my ($type, $hash, $name) = @_;
+	my ($v1, $v2, $v3);
+
+	if ($hash) {
+		$v1 = namehash($name);
+		$v2 = $v3 = namehash(scalar reverse $name);
+	} else {
+		$v1 = rand(1);
+		$v2 = rand(1);
+		$v3 = rand(1);
+	}
+
+	# theme palettes
+	if (defined $type and $type eq "hot") {
+		my $r = 205 + int(50 * $v3);
+		my $g = 0 + int(230 * $v1);
+		my $b = 0 + int(55 * $v2);
+		return "rgb($r,$g,$b)";
+	}
+	if (defined $type and $type eq "mem") {
+		my $r = 0;
+		my $g = 190 + int(50 * $v2);
+		my $b = 0 + int(210 * $v1);
+		return "rgb($r,$g,$b)";
+	}
+	if (defined $type and $type eq "io") {
+		my $r = 80 + int(60 * $v1);
+		my $g = $r;
+		my $b = 190 + int(55 * $v2);
+		return "rgb($r,$g,$b)";
+	}
+
+	# multi palettes
+	if (defined $type and $type eq "java") {
+		# Handle both annotations (_[j], _[i], ...; which are
+		# accurate), as well as input that lacks any annotations, as
+		# best as possible. Without annotations, we get a little hacky
+		# and match on java|org|com, etc.
+		if ($name =~ m:_\[j\]$:) {	# jit annotation
+			$type = "green";
+		} elsif ($name =~ m:_\[i\]$:) {	# inline annotation
+			$type = "aqua";
+		} elsif ($name =~ m:^L?(java|javax|jdk|net|org|com|io|sun)/:) {	# Java
+			$type = "green";
+		} elsif ($name =~ /:::/) {      # Java, typical perf-map-agent method separator
+			$type = "green";	              
+		} elsif ($name =~ /::/) {	# C++
+			$type = "yellow";
+		} elsif ($name =~ m:_\[k\]$:) {	# kernel annotation
+			$type = "orange";
+		} elsif ($name =~ /::/) {	# C++
+			$type = "yellow";
+		} else {			# system
+			$type = "red";
+		}
+		# fall-through to color palettes
+	}
+	if (defined $type and $type eq "perl") {
+		if ($name =~ /::/) {		# C++
+			$type = "yellow";
+		} elsif ($name =~ m:Perl: or $name =~ m:\.pl:) {	# Perl
+			$type = "green";
+		} elsif ($name =~ m:_\[k\]$:) {	# kernel
+			$type = "orange";
+		} else {			# system
+			$type = "red";
+		}
+		# fall-through to color palettes
+	}
+	if (defined $type and $type eq "js") {
+		# Handle both annotations (_[j], _[i], ...; which are
+		# accurate), as well as input that lacks any annotations, as
+		# best as possible. Without annotations, we get a little hacky,
+		# and match on a "/" with a ".js", etc.
+		if ($name =~ m:_\[j\]$:) {	# jit annotation
+			if ($name =~ m:/:) {
+				$type = "green";	# source
+			} else {
+				$type = "aqua";		# builtin
+			}
+		} elsif ($name =~ /::/) {	# C++
+			$type = "yellow";
+		} elsif ($name =~ m:/.*\.js:) {	# JavaScript (match "/" in path)
+			$type = "green";
+		} elsif ($name =~ m/:/) {	# JavaScript (match ":" in builtin)
+			$type = "aqua";
+		} elsif ($name =~ m/^ $/) {	# Missing symbol
+			$type = "green";
+		} elsif ($name =~ m:_\[k\]:) {	# kernel
+			$type = "orange";
+		} else {			# system
+			$type = "red";
+		}
+		# fall-through to color palettes
+	}
+	if (defined $type and $type eq "wakeup") {
+		$type = "aqua";
+		# fall-through to color palettes
+	}
+	if (defined $type and $type eq "chain") {
+		if ($name =~ m:_\[w\]:) {	# waker
+			$type = "aqua"
+		} else {			# off-CPU
+			$type = "blue";
+		}
+		# fall-through to color palettes
+	}
+
+	# color palettes
+	if (defined $type and $type eq "red") {
+		my $r = 200 + int(55 * $v1);
+		my $x = 50 + int(80 * $v1);
+		return "rgb($r,$x,$x)";
+	}
+	if (defined $type and $type eq "green") {
+		my $g = 200 + int(55 * $v1);
+		my $x = 50 + int(60 * $v1);
+		return "rgb($x,$g,$x)";
+	}
+	if (defined $type and $type eq "blue") {
+		my $b = 205 + int(50 * $v1);
+		my $x = 80 + int(60 * $v1);
+		return "rgb($x,$x,$b)";
+	}
+	if (defined $type and $type eq "yellow") {
+		my $x = 175 + int(55 * $v1);
+		my $b = 50 + int(20 * $v1);
+		return "rgb($x,$x,$b)";
+	}
+	if (defined $type and $type eq "purple") {
+		my $x = 190 + int(65 * $v1);
+		my $g = 80 + int(60 * $v1);
+		return "rgb($x,$g,$x)";
+	}
+	if (defined $type and $type eq "aqua") {
+		my $r = 50 + int(60 * $v1);
+		my $g = 165 + int(55 * $v1);
+		my $b = 165 + int(55 * $v1);
+		return "rgb($r,$g,$b)";
+	}
+	if (defined $type and $type eq "orange") {
+		my $r = 190 + int(65 * $v1);
+		my $g = 90 + int(65 * $v1);
+		return "rgb($r,$g,0)";
+	}
+
+	return "rgb(0,0,0)";
+}
+
+sub color_scale {
+	my ($value, $max) = @_;
+	my ($r, $g, $b) = (255, 255, 255);
+	$value = -$value if $negate;
+	if ($value > 0) {
+		$g = $b = int(210 * ($max - $value) / $max);
+	} elsif ($value < 0) {
+		$r = $g = int(210 * ($max + $value) / $max);
+	}
+	return "rgb($r,$g,$b)";
+}
+
+sub color_map {
+	my ($colors, $func) = @_;
+	if (exists $palette_map{$func}) {
+		return $palette_map{$func};
+	} else {
+		$palette_map{$func} = color($colors, $hash, $func);
+		return $palette_map{$func};
+	}
+}
+
+sub write_palette {
+	open(FILE, ">$pal_file");
+	foreach my $key (sort keys %palette_map) {
+		print FILE $key."->".$palette_map{$key}."\n";
+	}
+	close(FILE);
+}
+
+sub read_palette {
+	if (-e $pal_file) {
+	open(FILE, $pal_file) or die "can't open file $pal_file: $!";
+	while ( my $line = <FILE>) {
+		chomp($line);
+		(my $key, my $value) = split("->",$line);
+		$palette_map{$key}=$value;
+	}
+	close(FILE)
+	}
+}
+
+my %Node;	# Hash of merged frame data
+my %Tmp;
+
+# flow() merges two stacks, storing the merged frames and value data in %Node.
+sub flow {
+	my ($last, $this, $v, $d) = @_;
+
+	my $len_a = @$last - 1;
+	my $len_b = @$this - 1;
+
+	my $i = 0;
+	my $len_same;
+	for (; $i <= $len_a; $i++) {
+		last if $i > $len_b;
+		last if $last->[$i] ne $this->[$i];
+	}
+	$len_same = $i;
+
+	for ($i = $len_a; $i >= $len_same; $i--) {
+		my $k = "$last->[$i];$i";
+		# a unique ID is constructed from "func;depth;etime";
+		# func-depth isn't unique, it may be repeated later.
+		$Node{"$k;$v"}->{stime} = delete $Tmp{$k}->{stime};
+		if (defined $Tmp{$k}->{delta}) {
+			$Node{"$k;$v"}->{delta} = delete $Tmp{$k}->{delta};
+		}
+		delete $Tmp{$k};
+	}
+
+	for ($i = $len_same; $i <= $len_b; $i++) {
+		my $k = "$this->[$i];$i";
+		$Tmp{$k}->{stime} = $v;
+		if (defined $d) {
+			$Tmp{$k}->{delta} += $i == $len_b ? $d : 0;
+		}
+	}
+
+        return $this;
+}
+
+# parse input
+my @Data;
+my @SortedData;
+my $last = [];
+my $time = 0;
+my $delta = undef;
+my $ignored = 0;
+my $line;
+my $maxdelta = 1;
+
+# reverse if needed
+foreach (<>) {
+	chomp;
+	$line = $_;
+	if ($stackreverse) {
+		# there may be an extra samples column for differentials
+		# XXX todo: redo these REs as one. It's repeated below.
+		my($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
+		my $samples2 = undef;
+		if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) {
+			$samples2 = $samples;
+			($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
+			unshift @Data, join(";", reverse split(";", $stack)) . " $samples $samples2";
+		} else {
+			unshift @Data, join(";", reverse split(";", $stack)) . " $samples";
+		}
+	} else {
+		unshift @Data, $line;
+	}
+}
+
+if ($flamechart) {
+	# In flame chart mode, just reverse the data so time moves from left to right.
+	@SortedData = reverse @Data;
+} else {
+	@SortedData = sort @Data;
+}
+
+# process and merge frames
+foreach (@SortedData) {
+	chomp;
+	# process: folded_stack count
+	# eg: func_a;func_b;func_c 31
+	my ($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
+	unless (defined $samples and defined $stack) {
+		++$ignored;
+		next;
+	}
+
+	# there may be an extra samples column for differentials:
+	my $samples2 = undef;
+	if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) {
+		$samples2 = $samples;
+		($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/);
+	}
+	$delta = undef;
+	if (defined $samples2) {
+		$delta = $samples2 - $samples;
+		$maxdelta = abs($delta) if abs($delta) > $maxdelta;
+	}
+
+	# for chain graphs, annotate waker frames with "_[w]", for later
+	# coloring. This is a hack, but has a precedent ("_[k]" from perf).
+	if ($colors eq "chain") {
+		my @parts = split ";--;", $stack;
+		my @newparts = ();
+		$stack = shift @parts;
+		$stack .= ";--;";
+		foreach my $part (@parts) {
+			$part =~ s/;/_[w];/g;
+			$part .= "_[w]";
+			push @newparts, $part;
+		}
+		$stack .= join ";--;", @parts;
+	}
+
+	# merge frames and populate %Node:
+	$last = flow($last, [ '', split ";", $stack ], $time, $delta);
+
+	if (defined $samples2) {
+		$time += $samples2;
+	} else {
+		$time += $samples;
+	}
+}
+flow($last, [], $time, $delta);
+
+warn "Ignored $ignored lines with invalid format\n" if $ignored;
+unless ($time) {
+	warn "ERROR: No stack counts found\n";
+	my $im = SVG->new();
+	# emit an error message SVG, for tools automating flamegraph use
+	my $imageheight = $fontsize * 5;
+	$im->header($imagewidth, $imageheight);
+	$im->stringTTF(undef, int($imagewidth / 2), $fontsize * 2,
+	    "ERROR: No valid input provided to flamegraph.pl.");
+	print $im->svg;
+	exit 2;
+}
+if ($timemax and $timemax < $time) {
+	warn "Specified --total $timemax is less than actual total $time, so ignored\n"
+	if $timemax/$time > 0.02; # only warn is significant (e.g., not rounding etc)
+	undef $timemax;
+}
+$timemax ||= $time;
+
+my $widthpertime = ($imagewidth - 2 * $xpad) / $timemax;
+my $minwidth_time = $minwidth / $widthpertime;
+
+# prune blocks that are too narrow and determine max depth
+while (my ($id, $node) = each %Node) {
+	my ($func, $depth, $etime) = split ";", $id;
+	my $stime = $node->{stime};
+	die "missing start for $id" if not defined $stime;
+
+	if (($etime-$stime) < $minwidth_time) {
+		delete $Node{$id};
+		next;
+	}
+	$depthmax = $depth if $depth > $depthmax;
+}
+
+# draw canvas, and embed interactive JavaScript program
+my $imageheight = (($depthmax + 1) * $frameheight) + $ypad1 + $ypad2;
+$imageheight += $ypad3 if $subtitletext ne "";
+my $titlesize = $fontsize + 5;
+my $im = SVG->new();
+my ($black, $vdgrey, $dgrey) = (
+	$im->colorAllocate(0, 0, 0),
+	$im->colorAllocate(160, 160, 160),
+	$im->colorAllocate(200, 200, 200),
+    );
+$im->header($imagewidth, $imageheight);
+my $inc = <<INC;
+<defs>
+	<linearGradient id="background" y1="0" y2="1" x1="0" x2="0" >
+		<stop stop-color="$bgcolor1" offset="5%" />
+		<stop stop-color="$bgcolor2" offset="95%" />
+	</linearGradient>
+</defs>
+<style type="text/css">
+	text { font-family:$fonttype; font-size:${fontsize}px; fill:$black; }
+	#search, #ignorecase { opacity:0.1; cursor:pointer; }
+	#search:hover, #search.show, #ignorecase:hover, #ignorecase.show { opacity:1; }
+	#subtitle { text-anchor:middle; font-color:$vdgrey; }
+	#title { text-anchor:middle; font-size:${titlesize}px}
+	#unzoom { cursor:pointer; }
+	#frames > *:hover { stroke:black; stroke-width:0.5; cursor:pointer; }
+	.hide { display:none; }
+	.parent { opacity:0.5; }
+</style>
+<script type="text/ecmascript">
+<![CDATA[
+	"use strict";
+	var details, searchbtn, unzoombtn, matchedtxt, svg, searching, currentSearchTerm, ignorecase, ignorecaseBtn;
+	function init(evt) {
+		details = document.getElementById("details").firstChild;
+		searchbtn = document.getElementById("search");
+		ignorecaseBtn = document.getElementById("ignorecase");
+		unzoombtn = document.getElementById("unzoom");
+		matchedtxt = document.getElementById("matched");
+		svg = document.getElementsByTagName("svg")[0];
+		searching = 0;
+		currentSearchTerm = null;
+
+		// use GET parameters to restore a flamegraphs state.
+		var params = get_params();
+		if (params.x && params.y)
+			zoom(find_group(document.querySelector('[x="' + params.x + '"][y="' + params.y + '"]')));
+                if (params.s) search(params.s);
+	}
+
+	// event listeners
+	window.addEventListener("click", function(e) {
+		var target = find_group(e.target);
+		if (target) {
+			if (target.nodeName == "a") {
+				if (e.ctrlKey === false) return;
+				e.preventDefault();
+			}
+			if (target.classList.contains("parent")) unzoom();
+			zoom(target);
+			if (!document.querySelector('.parent')) {
+				clearzoom();
+				return;
+			}
+
+			// set parameters for zoom state
+			var el = target.querySelector("rect");
+			if (el && el.attributes && el.attributes.y && el.attributes._orig_x) {
+				var params = get_params()
+				params.x = el.attributes._orig_x.value;
+				params.y = el.attributes.y.value;
+				history.replaceState(null, null, parse_params(params));
+			}
+		}
+		else if (e.target.id == "unzoom") clearzoom();
+		else if (e.target.id == "search") search_prompt();
+		else if (e.target.id == "ignorecase") toggle_ignorecase();
+	}, false)
+
+	// mouse-over for info
+	// show
+	window.addEventListener("mouseover", function(e) {
+		var target = find_group(e.target);
+		if (target) details.nodeValue = "$nametype " + g_to_text(target);
+	}, false)
+
+	// clear
+	window.addEventListener("mouseout", function(e) {
+		var target = find_group(e.target);
+		if (target) details.nodeValue = ' ';
+	}, false)
+
+	// ctrl-F for search
+	// ctrl-I to toggle case-sensitive search
+	window.addEventListener("keydown",function (e) {
+		if (e.keyCode === 114 || (e.ctrlKey && e.keyCode === 70)) {
+			e.preventDefault();
+			search_prompt();
+		}
+		else if (e.ctrlKey && e.keyCode === 73) {
+			e.preventDefault();
+			toggle_ignorecase();
+		}
+	}, false)
+
+	// functions
+	function get_params() {
+		var params = {};
+		var paramsarr = window.location.search.substr(1).split('&');
+		for (var i = 0; i < paramsarr.length; ++i) {
+			var tmp = paramsarr[i].split("=");
+			if (!tmp[0] || !tmp[1]) continue;
+			params[tmp[0]]  = decodeURIComponent(tmp[1]);
+		}
+		return params;
+	}
+	function parse_params(params) {
+		var uri = "?";
+		for (var key in params) {
+			uri += key + '=' + encodeURIComponent(params[key]) + '&';
+		}
+		if (uri.slice(-1) == "&")
+			uri = uri.substring(0, uri.length - 1);
+		if (uri == '?')
+			uri = window.location.href.split('?')[0];
+		return uri;
+	}
+	function find_child(node, selector) {
+		var children = node.querySelectorAll(selector);
+		if (children.length) return children[0];
+	}
+	function find_group(node) {
+		var parent = node.parentElement;
+		if (!parent) return;
+		if (parent.id == "frames") return node;
+		return find_group(parent);
+	}
+	function orig_save(e, attr, val) {
+		if (e.attributes["_orig_" + attr] != undefined) return;
+		if (e.attributes[attr] == undefined) return;
+		if (val == undefined) val = e.attributes[attr].value;
+		e.setAttribute("_orig_" + attr, val);
+	}
+	function orig_load(e, attr) {
+		if (e.attributes["_orig_"+attr] == undefined) return;
+		e.attributes[attr].value = e.attributes["_orig_" + attr].value;
+		e.removeAttribute("_orig_"+attr);
+	}
+	function g_to_text(e) {
+		var text = find_child(e, "title").firstChild.nodeValue;
+		return (text)
+	}
+	function g_to_func(e) {
+		var func = g_to_text(e);
+		// if there's any manipulation we want to do to the function
+		// name before it's searched, do it here before returning.
+		return (func);
+	}
+	function update_text(e) {
+		var r = find_child(e, "rect");
+		var t = find_child(e, "text");
+		var w = parseFloat(r.attributes.width.value) -3;
+		var txt = find_child(e, "title").textContent.replace(/\\([^(]*\\)\$/,"");
+		t.attributes.x.value = parseFloat(r.attributes.x.value) + 3;
+
+		// Smaller than this size won't fit anything
+		if (w < 2 * $fontsize * $fontwidth) {
+			t.textContent = "";
+			return;
+		}
+
+		t.textContent = txt;
+		// Fit in full text width
+		if (/^ *\$/.test(txt) || t.getSubStringLength(0, txt.length) < w)
+			return;
+
+		for (var x = txt.length - 2; x > 0; x--) {
+			if (t.getSubStringLength(0, x + 2) <= w) {
+				t.textContent = txt.substring(0, x) + "..";
+				return;
+			}
+		}
+		t.textContent = "";
+	}
+
+	// zoom
+	function zoom_reset(e) {
+		if (e.attributes != undefined) {
+			orig_load(e, "x");
+			orig_load(e, "width");
+		}
+		if (e.childNodes == undefined) return;
+		for (var i = 0, c = e.childNodes; i < c.length; i++) {
+			zoom_reset(c[i]);
+		}
+	}
+	function zoom_child(e, x, ratio) {
+		if (e.attributes != undefined) {
+			if (e.attributes.x != undefined) {
+				orig_save(e, "x");
+				e.attributes.x.value = (parseFloat(e.attributes.x.value) - x - $xpad) * ratio + $xpad;
+				if (e.tagName == "text")
+					e.attributes.x.value = find_child(e.parentNode, "rect[x]").attributes.x.value + 3;
+			}
+			if (e.attributes.width != undefined) {
+				orig_save(e, "width");
+				e.attributes.width.value = parseFloat(e.attributes.width.value) * ratio;
+			}
+		}
+
+		if (e.childNodes == undefined) return;
+		for (var i = 0, c = e.childNodes; i < c.length; i++) {
+			zoom_child(c[i], x - $xpad, ratio);
+		}
+	}
+	function zoom_parent(e) {
+		if (e.attributes) {
+			if (e.attributes.x != undefined) {
+				orig_save(e, "x");
+				e.attributes.x.value = $xpad;
+			}
+			if (e.attributes.width != undefined) {
+				orig_save(e, "width");
+				e.attributes.width.value = parseInt(svg.width.baseVal.value) - ($xpad * 2);
+			}
+		}
+		if (e.childNodes == undefined) return;
+		for (var i = 0, c = e.childNodes; i < c.length; i++) {
+			zoom_parent(c[i]);
+		}
+	}
+	function zoom(node) {
+		var attr = find_child(node, "rect").attributes;
+		var width = parseFloat(attr.width.value);
+		var xmin = parseFloat(attr.x.value);
+		var xmax = parseFloat(xmin + width);
+		var ymin = parseFloat(attr.y.value);
+		var ratio = (svg.width.baseVal.value - 2 * $xpad) / width;
+
+		// XXX: Workaround for JavaScript float issues (fix me)
+		var fudge = 0.0001;
+
+		unzoombtn.classList.remove("hide");
+
+		var el = document.getElementById("frames").children;
+		for (var i = 0; i < el.length; i++) {
+			var e = el[i];
+			var a = find_child(e, "rect").attributes;
+			var ex = parseFloat(a.x.value);
+			var ew = parseFloat(a.width.value);
+			var upstack;
+			// Is it an ancestor
+			if ($inverted == 0) {
+				upstack = parseFloat(a.y.value) > ymin;
+			} else {
+				upstack = parseFloat(a.y.value) < ymin;
+			}
+			if (upstack) {
+				// Direct ancestor
+				if (ex <= xmin && (ex+ew+fudge) >= xmax) {
+					e.classList.add("parent");
+					zoom_parent(e);
+					update_text(e);
+				}
+				// not in current path
+				else
+					e.classList.add("hide");
+			}
+			// Children maybe
+			else {
+				// no common path
+				if (ex < xmin || ex + fudge >= xmax) {
+					e.classList.add("hide");
+				}
+				else {
+					zoom_child(e, xmin, ratio);
+					update_text(e);
+				}
+			}
+		}
+		search();
+	}
+	function unzoom() {
+		unzoombtn.classList.add("hide");
+		var el = document.getElementById("frames").children;
+		for(var i = 0; i < el.length; i++) {
+			el[i].classList.remove("parent");
+			el[i].classList.remove("hide");
+			zoom_reset(el[i]);
+			update_text(el[i]);
+		}
+		search();
+	}
+	function clearzoom() {
+		unzoom();
+
+		// remove zoom state
+		var params = get_params();
+		if (params.x) delete params.x;
+		if (params.y) delete params.y;
+		history.replaceState(null, null, parse_params(params));
+	}
+
+	// search
+	function toggle_ignorecase() {
+		ignorecase = !ignorecase;
+		if (ignorecase) {
+			ignorecaseBtn.classList.add("show");
+		} else {
+			ignorecaseBtn.classList.remove("show");
+		}
+		reset_search();
+		search();
+	}
+	function reset_search() {
+		var el = document.querySelectorAll("#frames rect");
+		for (var i = 0; i < el.length; i++) {
+			orig_load(el[i], "fill")
+		}
+		var params = get_params();
+		delete params.s;
+		history.replaceState(null, null, parse_params(params));
+	}
+	function search_prompt() {
+		if (!searching) {
+			var term = prompt("Enter a search term (regexp " +
+			    "allowed, eg: ^ext4_)"
+			    + (ignorecase ? ", ignoring case" : "")
+			    + "\\nPress Ctrl-i to toggle case sensitivity", "");
+			if (term != null) search(term);
+		} else {
+			reset_search();
+			searching = 0;
+			currentSearchTerm = null;
+			searchbtn.classList.remove("show");
+			searchbtn.firstChild.nodeValue = "Search"
+			matchedtxt.classList.add("hide");
+			matchedtxt.firstChild.nodeValue = ""
+		}
+	}
+	function search(term) {
+		if (term) currentSearchTerm = term;
+
+		var re = new RegExp(currentSearchTerm, ignorecase ? 'i' : '');
+		var el = document.getElementById("frames").children;
+		var matches = new Object();
+		var maxwidth = 0;
+		for (var i = 0; i < el.length; i++) {
+			var e = el[i];
+			var func = g_to_func(e);
+			var rect = find_child(e, "rect");
+			if (func == null || rect == null)
+				continue;
+
+			// Save max width. Only works as we have a root frame
+			var w = parseFloat(rect.attributes.width.value);
+			if (w > maxwidth)
+				maxwidth = w;
+
+			if (func.match(re)) {
+				// highlight
+				var x = parseFloat(rect.attributes.x.value);
+				orig_save(rect, "fill");
+				rect.attributes.fill.value = "$searchcolor";
+
+				// remember matches
+				if (matches[x] == undefined) {
+					matches[x] = w;
+				} else {
+					if (w > matches[x]) {
+						// overwrite with parent
+						matches[x] = w;
+					}
+				}
+				searching = 1;
+			}
+		}
+		if (!searching)
+			return;
+		var params = get_params();
+		params.s = currentSearchTerm;
+		history.replaceState(null, null, parse_params(params));
+
+		searchbtn.classList.add("show");
+		searchbtn.firstChild.nodeValue = "Reset Search";
+
+		// calculate percent matched, excluding vertical overlap
+		var count = 0;
+		var lastx = -1;
+		var lastw = 0;
+		var keys = Array();
+		for (k in matches) {
+			if (matches.hasOwnProperty(k))
+				keys.push(k);
+		}
+		// sort the matched frames by their x location
+		// ascending, then width descending
+		keys.sort(function(a, b){
+			return a - b;
+		});
+		// Step through frames saving only the biggest bottom-up frames
+		// thanks to the sort order. This relies on the tree property
+		// where children are always smaller than their parents.
+		var fudge = 0.0001;	// JavaScript floating point
+		for (var k in keys) {
+			var x = parseFloat(keys[k]);
+			var w = matches[keys[k]];
+			if (x >= lastx + lastw - fudge) {
+				count += w;
+				lastx = x;
+				lastw = w;
+			}
+		}
+		// display matched percent
+		matchedtxt.classList.remove("hide");
+		var pct = 100 * count / maxwidth;
+		if (pct != 100) pct = pct.toFixed(1)
+		matchedtxt.firstChild.nodeValue = "Matched: " + pct + "%";
+	}
+]]>
+</script>
+INC
+$im->include($inc);
+$im->filledRectangle(0, 0, $imagewidth, $imageheight, 'url(#background)');
+$im->stringTTF("title", int($imagewidth / 2), $fontsize * 2, $titletext);
+$im->stringTTF("subtitle", int($imagewidth / 2), $fontsize * 4, $subtitletext) if $subtitletext ne "";
+$im->stringTTF("details", $xpad, $imageheight - ($ypad2 / 2), " ");
+$im->stringTTF("unzoom", $xpad, $fontsize * 2, "Reset Zoom", 'class="hide"');
+$im->stringTTF("search", $imagewidth - $xpad - 100, $fontsize * 2, "Search");
+$im->stringTTF("ignorecase", $imagewidth - $xpad - 16, $fontsize * 2, "ic");
+$im->stringTTF("matched", $imagewidth - $xpad - 100, $imageheight - ($ypad2 / 2), " ");
+
+if ($palette) {
+	read_palette();
+}
+
+# draw frames
+$im->group_start({id => "frames"});
+while (my ($id, $node) = each %Node) {
+	my ($func, $depth, $etime) = split ";", $id;
+	my $stime = $node->{stime};
+	my $delta = $node->{delta};
+
+	$etime = $timemax if $func eq "" and $depth == 0;
+
+	my $x1 = $xpad + $stime * $widthpertime;
+	my $x2 = $xpad + $etime * $widthpertime;
+	my ($y1, $y2);
+	unless ($inverted) {
+		$y1 = $imageheight - $ypad2 - ($depth + 1) * $frameheight + $framepad;
+		$y2 = $imageheight - $ypad2 - $depth * $frameheight;
+	} else {
+		$y1 = $ypad1 + $depth * $frameheight;
+		$y2 = $ypad1 + ($depth + 1) * $frameheight - $framepad;
+	}
+
+	my $samples = sprintf "%.0f", ($etime - $stime) * $factor;
+	(my $samples_txt = $samples) # add commas per perlfaq5
+		=~ s/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/$1,/g;
+
+	my $info;
+	if ($func eq "" and $depth == 0) {
+		$info = "all ($samples_txt $countname, 100%)";
+	} else {
+		my $pct = sprintf "%.2f", ((100 * $samples) / ($timemax * $factor));
+		my $escaped_func = $func;
+		# clean up SVG breaking characters:
+		$escaped_func =~ s/&/&amp;/g;
+		$escaped_func =~ s/</&lt;/g;
+		$escaped_func =~ s/>/&gt;/g;
+		$escaped_func =~ s/"/&quot;/g;
+		$escaped_func =~ s/_\[[kwij]\]$//;	# strip any annotation
+		unless (defined $delta) {
+			$info = "$escaped_func ($samples_txt $countname, $pct%)";
+		} else {
+			my $d = $negate ? -$delta : $delta;
+			my $deltapct = sprintf "%.2f", ((100 * $d) / ($timemax * $factor));
+			$deltapct = $d > 0 ? "+$deltapct" : $deltapct;
+			$info = "$escaped_func ($samples_txt $countname, $pct%; $deltapct%)";
+		}
+	}
+
+	my $nameattr = { %{ $nameattr{$func}||{} } }; # shallow clone
+	$nameattr->{title}       ||= $info;
+	$im->group_start($nameattr);
+
+	my $color;
+	if ($func eq "--") {
+		$color = $vdgrey;
+	} elsif ($func eq "-") {
+		$color = $dgrey;
+	} elsif (defined $delta) {
+		$color = color_scale($delta, $maxdelta);
+	} elsif ($palette) {
+		$color = color_map($colors, $func);
+	} else {
+		$color = color($colors, $hash, $func);
+	}
+	$im->filledRectangle($x1, $y1, $x2, $y2, $color, 'rx="2" ry="2"');
+
+	my $chars = int( ($x2 - $x1) / ($fontsize * $fontwidth));
+	my $text = "";
+	if ($chars >= 3) { # room for one char plus two dots
+		$func =~ s/_\[[kwij]\]$//;	# strip any annotation
+		$text = substr $func, 0, $chars;
+		substr($text, -2, 2) = ".." if $chars < length $func;
+		$text =~ s/&/&amp;/g;
+		$text =~ s/</&lt;/g;
+		$text =~ s/>/&gt;/g;
+	}
+	$im->stringTTF(undef, $x1 + 3, 3 + ($y1 + $y2) / 2, $text);
+
+	$im->group_end($nameattr);
+}
+$im->group_end();
+
+print $im->svg;
+
+if ($palette) {
+	write_palette();
+}
+
+# vim: ts=8 sts=8 sw=8 noexpandtab
diff --git a/tools/generate-flamegraph.sh b/tools/generate-flamegraph.sh
new file mode 100755
index 0000000000000000000000000000000000000000..7953b9db6f1e3344cffaa14de1610375f59c6bbb
--- /dev/null
+++ b/tools/generate-flamegraph.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# Copyright 2022 Florian Schmaus, Florian Fischer
+set -euo pipefail
+
+if [[ -z "$1" ]] || [[ ! -f "$1" ]]; then
+	echo "usage: ${0##*/} PERF-DATA"
+	exit 2
+fi
+
+PERF=${PERF:-perf}
+PERF_DATA="$1"
+
+# Pretty fancy method to get reliable the absolute path of a shell
+# script, *even if it is sourced*. Credits go to GreenFox on
+# stackoverflow: http://stackoverflow.com/a/12197518/194894
+pushd . > /dev/null
+SCRIPTDIR="${BASH_SOURCE[0]}";
+while [ -h "${SCRIPTDIR}" ]; do
+	cd "$(dirname "${SCRIPTDIR}")"
+	SCRIPTDIR="$(readlink "$(basename "${SCRIPTDIR}")")";
+done
+cd "$(dirname "${SCRIPTDIR}")" > /dev/null
+SCRIPTDIR="$(pwd)";
+popd > /dev/null
+
+DATE=$(date -Iminutes)
+TMPDIR=$(mktemp --tmpdir=/var/tmp --directory "flamegraph-${DATE}-XXXXX")
+pushd "${TMPDIR}" > /dev/null
+
+${PERF} script -i "${PERF_DATA}" > out.perf
+"${SCRIPTDIR}"/stackcollapse-perf.pl out.perf > out.folded
+sed -e 's/^EMPER_Wrkr_#[0-9]\+;//' out.folded > out.folded.filtered
+"${SCRIPTDIR}"/flamegraph.pl out.folded.filtered > flame.svg
+
+popd >/dev/null
+
+FLAMEGRAPH="$(dirname "${PERF_DATA}")"/"$(basename "${PERF_DATA}" .data)"-flamegraph.svg
+cp "${TMPDIR}/"flame.svg "${FLAMEGRAPH}"
diff --git a/tools/stackcollapse-perf.pl b/tools/stackcollapse-perf.pl
new file mode 100755
index 0000000000000000000000000000000000000000..fd3c78e28196f0267ed1f3b5b2d5551e6dc73e27
--- /dev/null
+++ b/tools/stackcollapse-perf.pl
@@ -0,0 +1,430 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse-perf.pl	collapse perf samples into single lines.
+#
+# Parses a list of multiline stacks generated by "perf script", and
+# outputs a semicolon separated stack followed by a space and a count.
+# If memory addresses (+0xd) are present, they are stripped, and resulting
+# identical stacks are colased with their counts summed.
+#
+# USAGE: ./stackcollapse-perf.pl [options] infile > outfile
+#
+# Run "./stackcollapse-perf.pl -h" to list options.
+#
+# Example input:
+#
+#  swapper     0 [000] 158665.570607: cpu-clock:
+#         ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
+#         ffffffff8101c6a3 default_idle ([kernel.kallsyms])
+#         ffffffff81013236 cpu_idle ([kernel.kallsyms])
+#         ffffffff815bf03e rest_init ([kernel.kallsyms])
+#         ffffffff81aebbfe start_kernel ([kernel.kallsyms].init.text)
+#  [...]
+#
+# Example output:
+#
+#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 1
+#
+# Input may be created and processed using:
+#
+#  perf record -a -g -F 997 sleep 60
+#  perf script | ./stackcollapse-perf.pl > out.stacks-folded
+#
+# The output of "perf script" should include stack traces. If these are missing
+# for you, try manually selecting the perf script output; eg:
+#
+#  perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace | ...
+#
+# This is also required for the --pid or --tid options, so that the output has
+# both the PID and TID.
+#
+# Copyright 2012 Joyent, Inc.  All rights reserved.
+# Copyright 2012 Brendan Gregg.  All rights reserved.
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at docs/cddl1.txt or
+# http://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at docs/cddl1.txt.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# 02-Mar-2012	Brendan Gregg	Created this.
+# 02-Jul-2014	   "	  "	Added process name to stacks.
+
+use strict;
+use Getopt::Long;
+
+my %collapsed;
+
+sub remember_stack {
+	my ($stack, $count) = @_;
+	$collapsed{$stack} += $count;
+}
+my $annotate_kernel = 0; # put an annotation on kernel function
+my $annotate_jit = 0;   # put an annotation on jit symbols
+my $annotate_all = 0;   # enale all annotations
+my $include_pname = 1;	# include process names in stacks
+my $include_pid = 0;	# include process ID with process name
+my $include_tid = 0;	# include process & thread ID with process name
+my $include_addrs = 0;	# include raw address where a symbol can't be found
+my $tidy_java = 1;	# condense Java signatures
+my $tidy_generic = 1;	# clean up function names a little
+my $target_pname;	# target process name from perf invocation
+my $event_filter = "";    # event type filter, defaults to first encountered event
+my $event_defaulted = 0;  # whether we defaulted to an event (none provided)
+my $event_warning = 0;	  # if we printed a warning for the event
+
+my $show_inline = 0;
+my $show_context = 0;
+
+my $srcline_in_input = 0; # if there are extra lines with source location (perf script -F+srcline)
+GetOptions('inline' => \$show_inline,
+           'context' => \$show_context,
+           'srcline' => \$srcline_in_input,
+           'pid' => \$include_pid,
+           'kernel' => \$annotate_kernel,
+           'jit' => \$annotate_jit,
+           'all' => \$annotate_all,
+           'tid' => \$include_tid,
+           'addrs' => \$include_addrs,
+           'event-filter=s' => \$event_filter)
+or die <<USAGE_END;
+USAGE: $0 [options] infile > outfile\n
+	--pid		# include PID with process names [1]
+	--tid		# include TID and PID with process names [1]
+	--inline	# un-inline using addr2line
+	--all		# all annotations (--kernel --jit)
+	--kernel	# annotate kernel functions with a _[k]
+	--jit		# annotate jit functions with a _[j]
+	--context	# adds source context to --inline
+	--srcline	# parses output of 'perf script -F+srcline' and adds source context
+	--addrs		# include raw addresses where symbols can't be found
+	--event-filter=EVENT	# event name filter\n
+[1] perf script must emit both PID and TIDs for these to work; eg, Linux < 4.1:
+	perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace
+    for Linux >= 4.1:
+	perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace
+    If you save this output add --header on Linux >= 3.14 to include perf info.
+USAGE_END
+
+if ($annotate_all) {
+	$annotate_kernel = $annotate_jit = 1;
+}
+
+my %inlineCache;
+
+my %nmCache;
+
+sub inlineCacheAdd {
+	my ($pc, $mod, $result) = @_;
+   if (defined($inlineCache{$pc})) {
+      $inlineCache{$pc}{$mod} = $result;
+   } else {
+      $inlineCache{$pc} = {$mod => $result};
+   }
+}
+
+# for the --inline option
+sub inline {
+	my ($pc, $rawfunc, $mod) = @_;
+
+	return $inlineCache{$pc}{$mod} if defined($inlineCache{$pc}{$mod});
+
+	# capture addr2line output
+	my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`;
+
+	# remove first line
+	$a2l_output =~ s/^(.*\n){1}//;
+
+	if ($a2l_output =~ /\?\?\n\?\?:0/) {
+		# if addr2line fails and rawfunc is func+offset, then fall back to it
+		if ($rawfunc =~ /^(.+)\+0x([0-9a-f]+)$/) {
+			my $func = $1;
+			my $addr = hex $2;
+
+			$nmCache{$mod}=`nm $mod` unless defined $nmCache{$mod};
+
+			if ($nmCache{$mod} =~ /^([0-9a-f]+) . \Q$func\E$/m) {
+			   my $base = hex $1;
+				my $newPc = sprintf "0x%x", $base+$addr;
+				my $result = inline($newPc, '', $mod);
+				inlineCacheAdd($pc, $mod, $result);
+				return $result;
+			}
+		}
+	}
+
+	my @fullfunc;
+	my $one_item = "";
+	for (split /^/, $a2l_output) {
+		chomp $_;
+
+		# remove discriminator info if exists
+		$_ =~ s/ \(discriminator \S+\)//;
+
+		if ($one_item eq "") {
+			$one_item = $_;
+		} else {
+			if ($show_context == 1) {
+				unshift @fullfunc, $one_item . ":$_";
+			} else {
+				unshift @fullfunc, $one_item;
+			}
+			$one_item = "";
+		}
+	}
+
+	my $result = join ";" , @fullfunc;
+
+	inlineCacheAdd($pc, $mod, $result);
+
+	return $result;
+}
+
+my @stack;
+my $pname;
+my $m_pid;
+my $m_tid;
+
+#
+# Main loop
+#
+while (defined($_ = <>)) {
+
+	# find the name of the process launched by perf, by stepping backwards
+	# over the args to find the first non-option (no dash):
+	if (/^# cmdline/) {
+		my @args = split ' ', $_;
+		foreach my $arg (reverse @args) {
+			if ($arg !~ /^-/) {
+				$target_pname = $arg;
+				$target_pname =~ s:.*/::;  # strip pathname
+				last;
+			}
+		}
+	}
+
+	# skip remaining comments
+	next if m/^#/;
+	chomp;
+
+	# end of stack. save cached data.
+	if (m/^$/) {
+		# ignore filtered samples
+		next if not $pname;
+
+		if ($include_pname) {
+			if (defined $pname) {
+				unshift @stack, $pname;
+			} else {
+				unshift @stack, "";
+			}
+		}
+		remember_stack(join(";", @stack), 1) if @stack;
+		undef @stack;
+		undef $pname;
+		next;
+	}
+
+	#
+	# event record start
+	#
+	if (/^(\S.+?)\s+(\d+)\/*(\d+)*\s+/) {
+		# default "perf script" output has TID but not PID
+		# eg, "java 25607 4794564.109216: cycles:"
+		# eg, "java 12688 [002] 6544038.708352: cpu-clock:"
+		# eg, "V8 WorkerThread 25607 4794564.109216: cycles:"
+		# eg, "java 24636/25607 [000] 4794564.109216: cycles:"
+		# eg, "java 12688/12764 6544038.708352: cpu-clock:"
+		# eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: cycles:"
+		# other combinations possible
+		my ($comm, $pid, $tid) = ($1, $2, $3);
+		if (not $tid) {
+			$tid = $pid;
+			$pid = "?";
+		}
+
+		if (/(\S+):\s*$/) {
+			my $event = $1;
+
+			if ($event_filter eq "") {
+				# By default only show events of the first encountered
+				# event type. Merging together different types, such as
+				# instructions and cycles, produces misleading results.
+				$event_filter = $event;
+				$event_defaulted = 1;
+			} elsif ($event ne $event_filter) {
+				if ($event_defaulted and $event_warning == 0) {
+					# only print this warning if necessary:
+					# when we defaulted and there was
+					# multiple event types.
+					print STDERR "Filtering for events of type: $event\n";
+					$event_warning = 1;
+				}
+				next;
+			}
+		}
+
+		($m_pid, $m_tid) = ($pid, $tid);
+
+		if ($include_tid) {
+			$pname = "$comm-$m_pid/$m_tid";
+		} elsif ($include_pid) {
+			$pname = "$comm-$m_pid";
+		} else {
+			$pname = "$comm";
+		}
+		$pname =~ tr/ /_/;
+
+	#
+	# stack line
+	#
+	} elsif (/^\s*(\w+)\s*(.+) \((\S*)\)/) {
+		# ignore filtered samples
+		next if not $pname;
+
+		my ($pc, $rawfunc, $mod) = ($1, $2, $3);
+
+		if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) {
+			my $inlineRes = inline($pc, $rawfunc, $mod);
+			# - empty result this happens e.g., when $mod does not exist or is a path to a compressed kernel module
+			#   if this happens, the user will see error message from addr2line written to stderr
+			# - if addr2line results in "??" , then it's much more sane to fall back than produce a '??' in graph
+			if($inlineRes ne "" and $inlineRes ne "??" and $inlineRes ne "??:??:0" ) {
+				unshift @stack, $inlineRes;
+				next;
+			}
+		}
+
+		# Linux 4.8 included symbol offsets in perf script output by default, eg:
+		# 7fffb84c9afc cpu_startup_entry+0x800047c022ec ([kernel.kallsyms])
+		# strip these off:
+		$rawfunc =~ s/\+0x[\da-f]+$//;
+
+		next if $rawfunc =~ /^\(/;		# skip process names
+
+		my $is_unknown=0;
+		my @inline;
+		for (split /\->/, $rawfunc) {
+			my $func = $_;
+
+			if ($func eq "[unknown]") {
+				if ($mod ne "[unknown]") { # use module name instead, if known
+					$func = $mod;
+					$func =~ s/.*\///;
+				} else {
+					$func = "unknown";
+					$is_unknown=1;
+				}
+
+				if ($include_addrs) {
+					$func = "\[$func \<$pc\>\]";
+				} else {
+					$func = "\[$func\]";
+				}
+			}
+
+			if ($tidy_generic) {
+				$func =~ s/;/:/g;
+				if ($func !~ m/\.\(.*\)\./) {
+					# This doesn't look like a Go method name (such as
+					# "net/http.(*Client).Do"), so everything after the first open
+					# paren (that is not part of an "(anonymous namespace)") is
+					# just noise.
+					$func =~ s/\((?!anonymous namespace\)).*//;
+				}
+				# now tidy this horrible thing:
+				# 13a80b608e0a RegExp:[&<>\"\'] (/tmp/perf-7539.map)
+				$func =~ tr/"\'//d;
+				# fall through to $tidy_java
+			}
+
+			if ($tidy_java and $pname eq "java") {
+				# along with $tidy_generic, converts the following:
+				#	Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/ContextAction;)Ljava/lang/Object;
+				#	Lorg/mozilla/javascript/ContextFactory;.call(Lorg/mozilla/javascript/C
+				#	Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
+				# into:
+				#	org/mozilla/javascript/ContextFactory:.call
+				#	org/mozilla/javascript/ContextFactory:.call
+				#	org/mozilla/javascript/MemberBox:.init
+				$func =~ s/^L// if $func =~ m:/:;
+			}
+
+			#
+			# Annotations
+			#
+			# detect inlined from the @inline array
+			# detect kernel from the module name; eg, frames to parse include:
+			#          ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) 
+			#          8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux)
+			#          7d8 ipv4_conntrack_local+0x7f8f80b8 ([nf_conntrack_ipv4])
+			# detect jit from the module name; eg:
+			#          7f722d142778 Ljava/io/PrintStream;::print (/tmp/perf-19982.map)
+			if (scalar(@inline) > 0) {
+				$func .= "_[i]";	# inlined
+			} elsif ($annotate_kernel == 1 && $mod =~ m/(^\[|vmlinux$)/ && $mod !~ /unknown/) {
+				$func .= "_[k]";	# kernel
+			} elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) {
+				$func .= "_[j]";	# jitted
+			}
+
+			#
+			# Source lines
+			#
+			#
+			# Sample outputs:
+			#   | a.out 35081 252436.005167:     667783 cycles:
+			#   |                   408ebb some_method_name+0x8b (/full/path/to/a.out)
+			#   |   uniform_int_dist.h:300
+			#   |                   4069f5 main+0x935 (/full/path/to/a.out)
+			#   |   file.cpp:137
+			#   |             7f6d2148eb25 __libc_start_main+0xd5 (/lib64/libc-2.33.so)
+			#   |   libc-2.33.so[27b25]
+			#
+			#   | a.out 35081 252435.738165:     306459 cycles:
+			#   |             7f6d213c2750 [unknown] (/usr/lib64/libkmod.so.2.3.6)
+			#   |   libkmod.so.2.3.6[6750]
+			#
+			#   | a.out 35081 252435.738373:     315813 cycles:
+			#   |             7f6d215ca51b __strlen_avx2+0x4b (/lib64/libc-2.33.so)
+			#   |   libc-2.33.so[16351b]
+			#   |             7ffc71ee9580 [unknown] ([unknown])			
+			#   |
+			#
+			#   | a.out 35081 252435.718940:     247984 cycles:
+			#   |         ffffffff814f9302 up_write+0x32 ([kernel.kallsyms])
+			#   |   [kernel.kallsyms][ffffffff814f9302]
+			if($srcline_in_input and not $is_unknown){
+				$_ = <>;
+				chomp;
+				s/\[.*?\]//g;
+				s/^\s*//g;
+				s/\s*$//g;
+				$func.=':'.$_ unless $_ eq "";
+			}
+
+			push @inline, $func;
+		}
+
+		unshift @stack, @inline;
+	} else {
+		warn "Unrecognized line: $_";
+	}
+}
+
+foreach my $k (sort { $a cmp $b } keys %collapsed) {
+	print "$k $collapsed{$k}\n";
+}