analyze_results.py 6.74 KB
Newer Older
1
2
3
#!/usr/bin/env python2

import os
4
import re
5
6
7
import sys
import logging
import time
8
9
from itertools import groupby
from collections import defaultdict
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

tmp_path = "%s/git/versuchung/src"% os.environ["HOME"]
if os.path.exists(tmp_path):
    sys.path.append(tmp_path)

from versuchung.experiment import Experiment
from versuchung.types import String, Bool,Integer,List
from versuchung.files import File, Directory
from versuchung.execute import shell
from versuchung.tex import DatarefDict

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from incremental_rebuild import IncrementalCompilation
26
27
from historical_build  import HistoricalCompilation

28

29
class AnalyzeResults(Experiment):
30
31
32
33
    inputs =  {
        'incremental': List(IncrementalCompilation(), []),
        'historical': List(HistoricalCompilation(), []),
    }
34
35
36
37
38
39
    outputs = {'tex': DatarefDict('data.dref')}

    def save(self, path, value):
        self.tex['/'.join(path)] = value
        logging.info("%s = %s", '/'.join(path), value)

40

41
    def run(self):
42
        self.project_name = ""
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
        x = sorted(self.incremental, key=lambda x:x.project_name())
        for (project, results) in groupby(x, key=lambda x:x.project_name()):
            times = defaultdict(lambda: dict())

            for result in sorted(results, key=lambda x:x.variant_name()):
            	records = eval(result.stats.value)
            	build_times_all = []
            	build_times_headers = []
            	build_times_sources = []

            	for build in records['builds']:
            	    t = build['build-time'] / 1e9

                    fn = re.sub(".*?/project/", "", build['filename'])
                    times[fn][result.method_name()] = t


            	    if build['filename'] == "FRESH_BUILD":
            	        self.save([result.variant_name(), "fresh build"], t)
            	        continue
            	    # Get a float in seconds
            	    build_times_all.append(t)
            	    if build['filename'].endswith('.h'):
            	        build_times_headers.append(t)
            	    else:
            	        build_times_sources.append(t)


            	    #print(build['id'])
            	def seq(key, seq):
            	    self.save(key +["count"], len(seq))
            	    self.save(key +["avg"],   np.average(seq))

            	seq([result.variant_name(), 'rebuild'], build_times_all)
            	seq([result.variant_name(), 'rebuild', "sources"], build_times_sources)
            	seq([result.variant_name(), 'rebuild', "headers"], build_times_headers)

            def score(x):
                return times[x]['touch-clang-hash']/times[x]['touch-normal']
            x = sorted(times, key=score)
            best = x[0]
            print(project, best, score(best), times[best])

        ################################################################
        # Historical Build Times
        ################################################################
        x = sorted(self.historical, key=lambda x:x.project_name())
90
        hist = defaultdict(lambda: 0)
91
        method_stats = defaultdict(lambda: defaultdict(lambda: 0))
92
        HITS = defaultdict(lambda : defaultdict(lambda: 0))
93

94
95
96
97
        for (project, results) in groupby(x, key=lambda x:x.project_name()):
            times = defaultdict(lambda: dict())

            for result in sorted(results, key=lambda x:x.variant_name()):
98
                key = [result.variant_name(), 'historical']
99
100
                records = eval(result.stats.value)

101
                # How Many Hits were produced by clang-hash/ccache
102
                stats = defaultdict(lambda : 0)
103

104
                build_times = []
105
                failed = 0
106
                for build in records['builds']:
107
108
109
                    if build.get('failed'):
                        failed += 1
                        continue
110
111
112
                    t = build['build-time']/1e9
                    build_times.append(t)
                    times[build['commit']][result.metadata['mode']] = t
113
                    hist[int(t)] += 1
114

115
116
117
118
119
120
121
122
                    stats['misses/clang-hash'] += build.get('clang-hash-misses',0)
                    stats['hits/clang-hash'] += build.get('clang-hash-hits',0)
                    stats['misses/ccache'] += build.get('ccache-misses',0)
                    stats['hits/ccache'] += build.get('ccache-hits',0)
                    stats['hits'] += build.get('ccache-hits',0) \
                                     + build.get('clang-hash-hits',0)
                    stats['misses'] += build.get('ccache-misses',0) \
                                       + build.get('clang-hash-misses',0)
123
124
125
126
127
128
129
130
                    if result.metadata['mode'] == "ccache-clang-hash":
                        stats["misses"] -= (build.get('clang-hash-hits',0) \
                                            + build.get('clang-hash-misses',0))

                    a = build.get('ccache-hits',0)
                    b = build.get('clang-hash-hits',0)
                    HITS[build['commit']][result.metadata['mode']] \
                        =  (a + b, a, b)
131
132

                # Over all builds of an experiment
133
134
135
136
137
                def seq(key, seq):
                    self.save(key +["sum"], sum(seq))
                    self.save(key +["count"], len(seq))
                    self.save(key +["avg"],   np.average(seq))

138
139
                self.save(key + ["failed"], failed)
                seq(key, build_times)
140
141
142
143
                for k in stats:
                    self.save(key + [k], stats[k])
                    # Aggregate hits and misses per method
                    method_stats[result.metadata["mode"]][k] += stats[k]
144
145
146
147

            try:
                x = sorted(times, key=lambda x: times[x]['clang-hash']/times[x]['normal'])
                print(project, x[0], times[x[0]]['clang-hash']/times[x[0]]['normal'], times[x[0]])
148
                # Worst Commit: print(project, x[-1], times[x[-1]]['clang-hash']/times[x[-1]]['normal'], times[x[-1]])
149

150

151
152
153
154
155
156
                self.save([project, "best commit", "hash"], x[0][0:10])
                for k in ("normal", "ccache", "clang-hash", "ccache-clang-hash"):
                    self.save([project, "best commit", k], times[x[0]][k])
                self.save([project, "best commit", "ratio"], times[x[0]]['clang-hash']/times[x[0]]['normal'])
            except:
                pass
157

158
159
160
161
        # Output method statistics
        for method in method_stats:
            for k in method_stats[method]:
                self.save([method, "historical", k], method_stats[method][k])
162

163
164
165
166
        for Hash in HITS:
            if HITS[Hash]['clang-hash'][0] != HITS[Hash]['ccache-clang-hash'][0]:
                print Hash, HITS[Hash]['clang-hash'][0] - HITS[Hash]['ccache-clang-hash'][0], HITS[Hash]

167
if __name__ == "__main__":
168
    experiment = AnalyzeResults()
169
    dirname = experiment(sys.argv + ["-s"])