diff --git a/scripts/evaluate.py b/scripts/evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..204e909e7b5525a98052d831ba13153ef30d3da9
--- /dev/null
+++ b/scripts/evaluate.py
@@ -0,0 +1,145 @@
+## This file is part of the execution-time evaluation for the qronos observer abstractions.
+## Copyright (C) 2022-2023  Tim Rheinfels  <tim.rheinfels@fau.de>
+## See https://gitlab.cs.fau.de/qronos-state-abstractions/execution-time
+##
+## This program is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+###
+###  @file  evaluate.py
+###
+###  @brief  This script uses the data obtained from the STM32
+###          and runs the evaluation by computing statistics and
+###          the execution time bar chart.
+###
+###  @author  Tim Rheinfels  <tim.rheinfels@fau.de>
+###
+
+import logging
+import matplotlib.pyplot as plot
+import os
+import seaborn as sb
+import sys
+import tabulate
+
+from benchmarks.blind_abstraction import BlindAbstraction
+from benchmarks.observer_abstraction import ObserverAbstraction
+from benchmarks.luenberger_observer import LuenbergerObserver
+from benchmarks.kalman_filter import KalmanFilter
+
+from data import load_json_data
+
+
+# ===== Configuration =====
+
+# Measurement sizes n_y to be plotted in the bar graph
+measurement_sizes = (1, 10, 20, 30, 40, 50, 60)
+
+# ===== Configuration End =====
+
+
+
+# Taken from
+#   https://gist.github.com/SeanSyue/8c8ff717681e9ecffc8e43a686e68fd9
+"""
+Since os.listdir returns filenames in an arbitary order,
+this function is very handy for generating well-ordered filenames list.
+Credit: https://stackoverflow.com/questions/4813061/non-alphanumeric-list-order-from-os-listdir/48030307#48030307
+"""
+import re
+def sorted_alphanumeric(data):
+    convert = lambda text: int(text) if text.isdigit() else text.lower()
+    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
+    return sorted(data, key=alphanum_key)
+
+if len(sys.argv) != 3:
+    print('Usage: %s <input_file> <result_dir>' % sys.argv[0])
+    print('')
+    print('Parameters:')
+    print('  input_file: File to read the data from')
+    print('  result_dir: Diretory to store the results in')
+    print('')
+    print()
+    sys.exit(1)
+
+data = load_json_data(sys.argv[1])
+result_dir = sys.argv[2]
+
+# Load and process benchmark results
+benchmarks = [
+    BlindAbstraction(data),
+    ObserverAbstraction(data),
+    LuenbergerObserver(data),
+    KalmanFilter(data),
+]
+
+# Create result dirs
+os.makedirs(result_dir, exist_ok = True)
+os.makedirs(os.path.join(result_dir, 'statistics'), exist_ok = True)
+
+# Tabularise execution times, print, and store
+for benchmark in benchmarks:
+    statistics = benchmark.compute_statistics()
+
+    headers = ('Benchmark \ Execution Time / cycles', 'mean', 'std', 'cov', 'min', 'max')
+    rows = []
+    for key in sorted_alphanumeric(statistics.keys()):
+        rows.append((key, statistics[key]['mean'], statistics[key]['std'], statistics[key]['cov'], statistics[key]['min'], statistics[key]['max']))
+    rows.append(('', '', '', '', '', ''))
+    table = tabulate.tabulate(rows, headers)
+
+    # To stdout
+    print('Statistics for Benchmark "%s":' % benchmark.name)
+    print(table)
+    print()
+
+    # To file
+    with open(os.path.join(result_dir, 'statistics', '%s.txt' % benchmark.name), 'w') as f:
+        f.write(table)
+
+# Create bar graph figure comparing the execution times
+_, ax = plot.subplots(1, figsize=(10, 3.33))
+
+x = []
+y = []
+hue = []
+for benchmark in benchmarks:
+    statistics = benchmark.compute_statistics()
+    for n_y in measurement_sizes:
+        key = str(n_y)
+        if key not in statistics.keys():
+            logging.warn('Measurement size n_y=%d not contained in benchmark %s' % (n_y, benchmark.name))
+        execution_times = benchmark.execution_times[key].flatten()
+        n = len(execution_times)
+        x.extend([n_y] * n)
+        y.extend(execution_times)
+        hue.extend([benchmark.name] * n)
+
+bars = sb.barplot(x=x, y=y, hue=hue, errorbar=None, linewidth=8, ax=ax, alpha=0.99)
+
+# Set hatches
+hatches = (None, '\\\\', 'xx', '//')
+for i, patch in enumerate(bars.patches):
+    patch.set_hatch(hatches[int(i / len(measurement_sizes))])
+
+ax.set_yscale('log')
+
+ax.set_title('Execution Time Measurements on Cortex-M4')
+ax.set_xlabel(r'Measurement Dimension $n_y$')
+ax.set_ylabel(r'Execution Time in CPU Cycles')
+
+ax.grid()
+ax.legend(loc='upper left')
+
+# Save
+plot.savefig(os.path.join(result_dir, 'execution_times.pdf'), bbox_inches='tight')