scripts: updates to plotting scripts

e0588f38 · Andreas Ziegler · c5e73fda · e0588f38 · e0588f38 · e0588f38
Commit e0588f38 authored 3 years ago by Andreas Ziegler
--- a/scripts/compare_section_sizes.py
+++ b/scripts/compare_section_sizes.py
+#!/usr/bin/python3
+
+import os
+import sys
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib
+
+from elftools.elf.elffile import ELFFile
+
+print('Usage: {} orig_file shrunk_file'.format(sys.argv[0]))
+
+fd_orig = open(sys.argv[1], 'rb')
+elf_orig = ELFFile(fd_orig)
+fd_shrunk = open(sys.argv[2], 'rb')
+elf_shrunk = ELFFile(fd_shrunk)
+outfile = os.path.basename(sys.argv[1]) + '_section_sizes.pdf'
+
+lst = []
+for index, section in enumerate(elf_orig.iter_sections()):
+    size_orig = section['sh_size']
+    if size_orig == 0:
+        continue
+    section_shrunk = elf_shrunk.get_section_by_name(section.name)
+    if not section_shrunk:
+        break
+    size_shrunk = section_shrunk['sh_size']
+    print('[{}] {}: {} -> {} (-{:.3f}%)'.format(index, section.name,
+                                         size_orig, size_shrunk,
+                                         ((size_orig - size_shrunk) / size_orig) * 100))
+    lst.append({'section': section.name, 'old size': size_orig, 'new size': size_shrunk})
+
+last_n_equal = 0
+for d in reversed(lst):
+    if d['new size'] != d['old size']:
+        break
+    last_n_equal += 1
+last_n_equal = 0
+
+print('cutting {} last sections with matching sizes'.format(last_n_equal))
+df = pd.DataFrame(lst[:len(lst) - last_n_equal], columns=['section', 'old size', 'new size'])
+print(df)
+
+fig = plt.figure()
+ax1 = fig.add_subplot()
+max_x = df['old size'].max()
+max_x = round(max_x * 1.15)
+ax3 = df.plot.barh(x='section',
+#                     y=['code size after', 'code size before'],
+                     y=['old size', 'new size'],
+                     figsize=(7,10),
+                     xlim=(0,max_x),
+                     alpha=.7,
+                     width=.8,
+                     ax=ax1,
+                     legend=True)
+
+handles, labels = ax3.get_legend_handles_labels()
+ax3.set_xlabel('Size of section in bytes')
+ax3.set_ylabel('Name of ELF section')
+ax3.bar_label(handles[0], padding=10, fmt='%d')
+ax3.bar_label(handles[1], padding=10, fmt='%d')
+
+ax1.invert_yaxis()
+# Don't use scientific notation on the x axis
+ax3.ticklabel_format(style='plain', axis='x')
+ax3.get_xaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: '{:,}'.format(int(x)).replace(",", u"\N{thin space}")))
+# Write the plot out
+plt.tight_layout()
+plt.savefig(outfile)
--- a/scripts/copy_original_files.py
+++ b/scripts/copy_original_files.py
+#!/usr/bin/python3
+import sys
+import os
+import shutil
+
+from librarytrader.librarystore import LibraryStore
+
+s = LibraryStore()
+s.load(sys.argv[1])
+outpath = sys.argv[2]
+
+for key, value in s.items():
+    full_outpath = os.path.join(outpath, key.lstrip('/'))
+    os.makedirs(os.path.dirname(full_outpath), exist_ok=True)
+    if isinstance(value, str):
+        dirs_up_to_root = full_outpath.count('/') - 1
+        link_target = os.path.join('../' * dirs_up_to_root, value.lstrip('/'))
+        os.symlink(link_target, full_outpath)
+    else:
+        shutil.copy(key, full_outpath, follow_symlinks=False)
--- a/scripts/pandas_to_dataref.py
+++ b/scripts/pandas_to_dataref.py
+#!/usr/bin/python3
+
+import os
+import sys
+import re
+import pandas as pd
+
+from versuchung.tex import DatarefDict
+
+infile = sys.argv[1]
+outfile = sys.argv[2]
+index = sys.argv[3]
+store_path = None
+if len(sys.argv) > 4:
+    store_path = sys.argv[4]
+
+csv = pd.read_csv(infile)
+csv['filename'] = csv['filename'].apply(lambda x: os.path.basename(x))
+csv['latex filename'] = csv['filename'].apply(lambda x: re.sub('_', '\\_', x))
+csv['functions before'] = csv['exported functions before'] + csv['local functions before']
+csv['functions after'] = csv['exported functions after'] + csv['local functions after']
+
+csv = csv.set_index(index)
+print(csv)
+dref = DatarefDict(outfile)
+dref.pandas(csv, verbose=True)
+
+# Totals for ELF csv files (from ELFRemove)
+dref['total/filesize before'] = csv['filesize before'].sum()
+dref['total/filesize after'] = csv['filesize after'].sum()
+
+dref['total/code size before'] = csv['code size before'].sum()
+dref['total/code size after'] = csv['code size after'].sum()
+
+dref['total/local functions before'] = csv['local functions before'].sum()
+dref['total/local functions after'] = csv['local functions after'].sum()
+
+dref['total/exported functions before'] = csv['exported functions before'].sum()
+dref['total/exported functions after'] = csv['exported functions after'].sum()
+
+dref['total/functions before'] = csv['exported functions before'].sum() + csv['local functions before'].sum()
+dref['total/functions after'] = csv['exported functions after'].sum() + csv['local functions after'].sum()
+
+dref['total/number of libraries'] = len(csv)
+# The following are for the kernel CSV files
+#dref['total/number of files original'] = csv['number of files original'].sum()
+#dref['total/number of files tailored'] = csv['number of files tailored'].sum()
+#dref['total/number of features original'] = csv['number of features original'].sum()
+#dref['total/number of features tailored'] = csv['number of features tailored'].sum()
+
+if store_path:
+    from librarytrader.librarystore import LibraryStore
+    s = LibraryStore()
+    s.load(store_path)
+    non_libraries = 0
+    for l in s.get_library_objects():
+        if '.so' in l.fullname or os.path.basename(l.fullname).startswith('lib'):
+            continue
+        non_libraries += 1
+    dref['total/number of binaries'] = non_libraries
+
+dref.flush()
--- a/scripts/plot_codesize.py
+++ b/scripts/plot_codesize.py
+#!/usr/bin/python3
+
+import matplotlib
 import matplotlib.pyplot as plt
 import pandas as pd
 import os
@@ -62,6 +65,7 @@ ymax = df_s['code size before'].max() * 1.25
 ax.set_ylim((0, ymax))
 ax.bar_label(handles[0], padding=10, fmt='%d', rotation='vertical')
 ax.bar_label(handles[1], padding=10, fmt='%d', rotation='vertical')
+ax.get_yaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: '{:,}'.format(int(x)).replace(",", u"\N{thin space}")))

 # Write the plot out
 plt.tight_layout()

--- a/scripts/plot_filesize.py
+++ b/scripts/plot_filesize.py
+#!/usr/bin/python3
+
+import matplotlib
+import matplotlib.pyplot as plt
+import pandas as pd
+import os
+import seaborn
+import sys
+
+from matplotlib import gridspec, rcParams
+
+def change_width(ax, new_value):
+    for patch in ax.patches:
+        current_width = patch.get_width()
+        diff = current_width - new_value
+
+        # we change the bar width
+        patch.set_width(new_value)
+
+        # we recenter the bar
+        patch.set_x(patch.get_x() + diff * .5)
+
+infile = 'tailored_libs_clang.json/stats.csv'
+outfile = 'bla.svg'
+if len(sys.argv) > 1:
+    infile = sys.argv[1]
+if len(sys.argv) > 2:
+    outfile = sys.argv[2]
+
+rcParams.update({'figure.figsize': (9,6)})
+
+# Read and enhance datafile
+df = pd.read_csv(infile)
+df['f_after'] = df['exported functions after'] + df['local functions after']
+df['f_before'] = df['exported functions before'] + df['local functions before']
+df['filename'] = df['filename'].apply(lambda x: os.path.basename(x))
+
+# Sort dataframe by code size
+df_s = df.sort_values(by='code size before', ascending=False)
+
+# seaborn:
+data = df_s[['filename', 'filesize before', 'filesize after']].melt('filename')
+ax = seaborn.barplot(x='filename',
+                     y='value',
+                     hue='variable',
+                     data=data)
+ax.ticklabel_format(style='plain', axis='y')
+ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')
+
+change_width(ax, .30)
+
+handles, labels = ax.get_legend_handles_labels()
+# Switch the order of data labels in the legend
+#order = [1,0]
+#plt.legend([handles[idx] for idx in order],
+plt.legend(handles,
+        ['Size of original library file', 'Size of tailored library file'],
+        loc='upper right',
+#        fontsize='large',
+        bbox_to_anchor=(.95,.9))
+
+ax.set_xlabel('Library filename')
+ax.set_ylabel('File size in bytes')
+ymax = df_s['filesize before'].max() * 1.25
+ax.set_ylim((0, ymax))
+ax.bar_label(handles[0], padding=10, fmt='%d', rotation='vertical')
+ax.bar_label(handles[1], padding=10, fmt='%d', rotation='vertical')
+
+ax.get_yaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: '{:,}'.format(int(x)).replace(",", u"\N{thin space}")))
+# Write the plot out
+plt.tight_layout()
+plt.savefig(outfile)
--- a/scripts/plot_runtimes.py
+++ b/scripts/plot_runtimes.py
+#!/usr/bin/python3
+
 import matplotlib
 import matplotlib.pyplot as plt
 import pandas as pd