analyze_results.py 6.17 KB
Newer Older
1
2
3
#!/usr/bin/env python2

import os
4
import re
5
6
7
import sys
import logging
import time
8
9
from itertools import groupby
from collections import defaultdict
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

tmp_path = "%s/git/versuchung/src"% os.environ["HOME"]
if os.path.exists(tmp_path):
    sys.path.append(tmp_path)

from versuchung.experiment import Experiment
from versuchung.types import String, Bool,Integer,List
from versuchung.files import File, Directory
from versuchung.execute import shell
from versuchung.tex import DatarefDict

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from incremental_rebuild import IncrementalCompilation
26
27
from historical_build  import HistoricalCompilation

28

29
class AnalyzeResults(Experiment):
30
31
32
33
    inputs =  {
        'incremental': List(IncrementalCompilation(), []),
        'historical': List(HistoricalCompilation(), []),
    }
34
35
36
37
38
39
    outputs = {'tex': DatarefDict('data.dref')}

    def save(self, path, value):
        self.tex['/'.join(path)] = value
        logging.info("%s = %s", '/'.join(path), value)

40

41
    def run(self):
42
        self.project_name = ""
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
        x = sorted(self.incremental, key=lambda x:x.project_name())
        for (project, results) in groupby(x, key=lambda x:x.project_name()):
            times = defaultdict(lambda: dict())

            for result in sorted(results, key=lambda x:x.variant_name()):
            	records = eval(result.stats.value)
            	build_times_all = []
            	build_times_headers = []
            	build_times_sources = []

            	for build in records['builds']:
            	    t = build['build-time'] / 1e9

                    fn = re.sub(".*?/project/", "", build['filename'])
                    times[fn][result.method_name()] = t


            	    if build['filename'] == "FRESH_BUILD":
            	        self.save([result.variant_name(), "fresh build"], t)
            	        continue
            	    # Get a float in seconds
            	    build_times_all.append(t)
            	    if build['filename'].endswith('.h'):
            	        build_times_headers.append(t)
            	    else:
            	        build_times_sources.append(t)


            	    #print(build['id'])
            	def seq(key, seq):
            	    self.save(key +["count"], len(seq))
            	    self.save(key +["avg"],   np.average(seq))

            	seq([result.variant_name(), 'rebuild'], build_times_all)
            	seq([result.variant_name(), 'rebuild', "sources"], build_times_sources)
            	seq([result.variant_name(), 'rebuild', "headers"], build_times_headers)

            def score(x):
                return times[x]['touch-clang-hash']/times[x]['touch-normal']
            x = sorted(times, key=score)
            best = x[0]
            print(project, best, score(best), times[best])

        ################################################################
        # Historical Build Times
        ################################################################
        x = sorted(self.historical, key=lambda x:x.project_name())
90
        hist = defaultdict(lambda: 0)
91
92
        method_stats = defaultdict(lambda: defaultdict(lambda: 0))

93
94
95
96
        for (project, results) in groupby(x, key=lambda x:x.project_name()):
            times = defaultdict(lambda: dict())

            for result in sorted(results, key=lambda x:x.variant_name()):
97
                key = [result.variant_name(), 'historical']
98
99
                records = eval(result.stats.value)

100
                # How Many Hits were produced by clang-hash/ccache
101
                stats = defaultdict(lambda : 0)
102

103
                build_times = []
104
                failed = 0
105
                for build in records['builds']:
106
107
108
                    if build.get('failed'):
                        failed += 1
                        continue
109
110
111
                    t = build['build-time']/1e9
                    build_times.append(t)
                    times[build['commit']][result.metadata['mode']] = t
112
                    hist[int(t)] += 1
113

114
115
116
117
118
119
120
121
122
123
124
125
                    stats['misses/clang-hash'] += build.get('clang-hash-misses',0)
                    stats['hits/clang-hash'] += build.get('clang-hash-hits',0)
                    stats['misses/ccache'] += build.get('ccache-misses',0)
                    stats['hits/ccache'] += build.get('ccache-hits',0)
                    stats['hits'] += build.get('ccache-hits',0) \
                                     + build.get('clang-hash-hits',0)
                    stats['misses'] += build.get('ccache-misses',0) \
                                       + build.get('clang-hash-misses',0)
                    if result.mode.value == "ccache-clang-hash":
                        stats["misses"] -= build.get('clang-hash-hits',0)

                # Over all builds of an experiment
126
127
128
129
130
                def seq(key, seq):
                    self.save(key +["sum"], sum(seq))
                    self.save(key +["count"], len(seq))
                    self.save(key +["avg"],   np.average(seq))

131
132
                self.save(key + ["failed"], failed)
                seq(key, build_times)
133
134
135
136
                for k in stats:
                    self.save(key + [k], stats[k])
                    # Aggregate hits and misses per method
                    method_stats[result.metadata["mode"]][k] += stats[k]
137
138
139
140

            try:
                x = sorted(times, key=lambda x: times[x]['clang-hash']/times[x]['normal'])
                print(project, x[0], times[x[0]]['clang-hash']/times[x[0]]['normal'], times[x[0]])
141
                # Worst Commit: print(project, x[-1], times[x[-1]]['clang-hash']/times[x[-1]]['normal'], times[x[-1]])
142

143

144
145
146
147
148
149
                self.save([project, "best commit", "hash"], x[0][0:10])
                for k in ("normal", "ccache", "clang-hash", "ccache-clang-hash"):
                    self.save([project, "best commit", k], times[x[0]][k])
                self.save([project, "best commit", "ratio"], times[x[0]]['clang-hash']/times[x[0]]['normal'])
            except:
                pass
150

151
152
153
154
        # Output method statistics
        for method in method_stats:
            for k in method_stats[method]:
                self.save([method, "historical", k], method_stats[method][k])
155
156

if __name__ == "__main__":
157
    experiment = AnalyzeResults()
158
    dirname = experiment(sys.argv + ["-s"])