From 02d86d3e5216e51661974d069e1a7ed12fc12bad Mon Sep 17 00:00:00 2001 From: Ludwig Fueracker <ludwig.fueracker@fau.de> Date: Sun, 29 Jan 2017 20:46:05 +0100 Subject: [PATCH] fixed axis units, exclude broken commits from build graphs --- evaluate_data_with_stop.py | 68 +++++++++++++++++++++++++++----------- validate_hashes.py | 56 ++++++++++++++++--------------- 2 files changed, 77 insertions(+), 47 deletions(-) diff --git a/evaluate_data_with_stop.py b/evaluate_data_with_stop.py index 622eecd..53a5e11 100755 --- a/evaluate_data_with_stop.py +++ b/evaluate_data_with_stop.py @@ -150,7 +150,7 @@ def write_to_csv(data, column_names, filename): def print_avg(data, name): - print 'avg %s: %f' % (name, sum(data)/float(len(data))) + print 'avg %s: %d' % (name, int(sum(data)/float(len(data)))) ################################################################################ @@ -160,18 +160,18 @@ def plot_build_time_graph1(data): plot_build_time_composition_graph(data[0], data[1], data[2], data[3]) -def plot_build_time_composition_graph(parse_times, hash_times, compile_times, diff_to_build_time): # times in ms +def plot_build_time_composition_graph(parse_times, hash_times, compile_times, diff_to_build_time): # times in ns fig, ax = plt.subplots() - +#[i/1e6 for i in parse_times], ax.stackplot(np.arange(1, len(parse_times)+1), # x axis - [parse_times, hash_times, compile_times, + [[i/1e6 for i in parse_times], [i/1e6 for i in hash_times],[i/1e6 for i in compile_times], # ns to ms #diff_to_build_time ], colors=[parse_color,hash_color,compile_color, # remain_color ], edgecolor='none') plt.xlim(1,len(parse_times)) plt.xlabel('commits') - plt.ylabel('time [s]') + plt.ylabel('time [ms]') ax.set_yscale('log') lgd = ax.legend([#mpatches.Patch(color=remain_color), mpatches.Patch(color=compile_color), @@ -182,16 +182,6 @@ def plot_build_time_composition_graph(parse_times, hash_times, compile_times, di loc='center left', bbox_to_anchor=(1, 0.5)) fig.savefig(abs_path(BUILD_TIME_FILENAME), bbox_extra_artists=(lgd,), bbox_inches='tight') - print "\n-----------------" - print "average total times per build:" - print_avg(parse_times, 'parse') - print_avg(hash_times, 'hash') - print_avg(compile_times, 'compile') - print_avg(diff_to_build_time, 'remainder') - print "" - print "average times if header/source file touched" - print "-----------------\n" - ################################################################################ @@ -205,7 +195,12 @@ def make_graphs(full_record): diff_to_build_times = [] parse_times_header_touched = [] + hash_times_header_touched = [] + compile_times_header_touched = [] + parse_times_source_touched = [] + hash_times_source_touched = [] + compile_times_source_touched = [] # freshBuildRecord = full_record[0] @@ -231,26 +226,59 @@ def make_graphs(full_record): total_hash_duration += current_file_record[tr('hash-duration')] total_compile_duration += current_file_record[tr('compile-duration')] + if current_record[tr('filename')].endswith('.h'): + parse_times_header_touched.append(total_parse_duration) + hash_times_header_touched.append(total_hash_duration) + compile_times_header_touched.append(total_compile_duration) + elif current_record[tr('filename')].endswith('.c'): + parse_times_source_touched.append(total_parse_duration) + hash_times_source_touched.append(total_hash_duration) + compile_times_source_touched.append(total_compile_duration) + else: + print "unknown file extension: " + filename + # if total_parse_duration == 0:# or (total_compile_duration/1e6) > 500000: # continue - total_parse_times.append(total_parse_duration / 1e6) # nano to milli - total_hash_times.append(total_hash_duration / 1e6) - total_compile_times.append(total_compile_duration / 1e6) + total_parse_times.append(total_parse_duration) + total_hash_times.append(total_hash_duration) + total_compile_times.append(total_compile_duration) build_time = current_record[tr('build-time')] - total_build_times.append(build_time / 1e6) - diff_to_build_times.append((build_time - total_parse_duration - total_hash_duration - total_compile_duration) / 1e6) + total_build_times.append(build_time) + diff_to_build_times.append((build_time - total_parse_duration - total_hash_duration - total_compile_duration)) print 'run_id %d, #files_changed: %d' % (run_id, files_changed) + + print "\n---- Results ----" + print "avg total build times [ns]" print_avg(total_build_times, 'total') + print "-----------------" + print "average times if header file touched [ns]" + print_avg(parse_times_header_touched, 'parse') + print_avg(hash_times_header_touched, 'hash') + print_avg(compile_times_header_touched, 'compile') + print "-----------------" + print "average times if header source touched [ns]" + print_avg(parse_times_source_touched, 'parse') + print_avg(hash_times_source_touched, 'hash') + print_avg(compile_times_source_touched, 'compile') + print "-----------------" + print "average total times per build [ns]:" + print_avg(total_parse_times, 'parse') + print_avg(total_hash_times, 'hash') + print_avg(total_compile_times, 'compile') + print_avg(diff_to_build_times, 'remainder') + print "-----------------\n" # save data to csv files build_time_data = np.column_stack((total_parse_times, total_hash_times, total_compile_times, diff_to_build_times, total_build_times)) write_to_csv(build_time_data, BUILD_TIME_DATA_HEADER, abs_path(BUILD_TIME_DATA_FILENAME)) plot_build_time_composition_graph(total_parse_times, total_hash_times, total_compile_times, diff_to_build_times) + + ################################################################################ """functions for reading data from the csv files to skip full record building""" diff --git a/validate_hashes.py b/validate_hashes.py index 5c23bac..370a7a4 100755 --- a/validate_hashes.py +++ b/validate_hashes.py @@ -119,7 +119,7 @@ def validate_records(): print "" print "avg times:" for k,v in sum_of_times.items(): - print "%s: %d" % (k, v/nr_of_records) + print "%s: %d ns" % (k, v/nr_of_records) print "-----------------\n" write_to_csv([ [k,len(v)] for k,v in ast_hashes_dict.items() ], ['filename', 'nr of different hashes'], abs_path('different_ast_hashes_per_file.csv')) @@ -332,15 +332,15 @@ def plot_build_time_graph1(data): def plot_build_time_graph(measuredBuildTimes, realClangHashBuildTimes, optimalClangHashBuildTimes, optimalBuildTimes): # times in s fig, ax = plt.subplots() - ax.plot(measuredBuildTimes, label='measured build time') - ax.plot(realClangHashBuildTimes, label='real clang-hash build time') - ax.plot(optimalClangHashBuildTimes, label='optimal clang-hash build time') - ax.plot(optimalBuildTimes, label='optimal build time') + ax.plot([i/60 for i in measuredBuildTimes], label='measured build time') + ax.plot([i/60 for i in realClangHashBuildTimes], label='real clang-hash build time') + ax.plot([i/60 for i in optimalClangHashBuildTimes], label='optimal clang-hash build time') + ax.plot([i/60 for i in optimalBuildTimes], label='optimal build time') lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend on the right - + ax.set_ylim([0,5]) plt.xlabel('commits') - plt.ylabel('time [ms]') + plt.ylabel('time [min]') fig.savefig(abs_path(BUILD_TIMES_GRAPH_FILENAME), bbox_extra_artists=(lgd,), bbox_inches='tight') @@ -375,19 +375,19 @@ def plot_build_time_composition_graph(parseTimes, hashTimes, compileTimes, diffT print_avg(diffToBuildTime, 'remainder') -def plotTimeHistogram(times, filename): +def plotTimeHistogram(times, filename): # times in ms #TODO: understand params and vars - hist, bins = np.histogram(times, bins=50) + hist, bins = np.histogram([i/1000 for i in times], bins=50) # times to s width = 0.7 * (bins[1] - bins[0]) center = (bins[:-1] + bins[1:]) / 2 fig, ax = plt.subplots() - plt.xlabel('time [ms]') + plt.xlabel('time [s]') plt.ylabel('#files') ax.bar(center, hist, align='center', width=width) fig.savefig(filename) -def plotTimeMultiHistogram(parseTimes, hashTimes, compileTimes, filename): +def plotTimeMultiHistogram(parseTimes, hashTimes, compileTimes, filename): # times in ms bins = np.linspace(0, 5000, 50) data = np.vstack([parseTimes, hashTimes, compileTimes]).T fig, ax = plt.subplots() @@ -398,9 +398,9 @@ def plotTimeMultiHistogram(parseTimes, hashTimes, compileTimes, filename): fig.savefig(filename) fig, ax = plt.subplots() - data = [parseTimes, hashTimes, compileTimes] - plt.boxplot(data, 0, 'rs', 0, [5, 95]) - plt.xlabel('time [ms]') + boxplot_data = [[i/1000 for i in parseTimes], [i/1000 for i in hashTimes], [i/1000 for i in compileTimes]] # times to s + plt.boxplot(boxplot_data, 0, 'rs', 0, [5, 95]) + plt.xlabel('time [s]') plt.yticks([1, 2, 3], ['parsing', 'hashing', 'compiling']) #lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend on the right fig.savefig(filename[:-4] + '_boxplots' + GRAPH_EXTENSION) @@ -570,22 +570,24 @@ def make_graphs(full_record): optimalAstHashBuildTime = buildTime - totalOptimalRedundantCompileTime - measuredBuildTimes.append(buildTime / 1e9) # nano to seconds - optimalBuildTimes.append(optimalBuildTime / 1e9) - optimalClangHashBuildTimes.append(optimalAstHashBuildTime / 1e9) - realClangHashBuildTimes.append(realAstHashBuildTime / 1e9) + #TODO: remove broken commits; ok? + if buildTime > 3e12 and totalParseDuration/1e9 > 300: + measuredBuildTimes.append(buildTime / 16e9) # nano to seconds; also /16 to account for make -j16 + optimalBuildTimes.append(optimalBuildTime / 16e9) + optimalClangHashBuildTimes.append(optimalAstHashBuildTime / 16e9) + realClangHashBuildTimes.append(realAstHashBuildTime / 16e9) - totalParseTimes.append(totalParseDuration / 1e9) # nano to seconds - totalHashTimes.append(totalHashDuration / 1e9) - totalCompileTimes.append(totalCompileDuration / 1e9) - diffToBuildTime.append((buildTime - totalParseDuration - totalHashDuration - totalCompileDuration) / 1e9) + totalParseTimes.append(totalParseDuration / 16e9) # nano to seconds + totalHashTimes.append(totalHashDuration / 16e9) + totalCompileTimes.append(totalCompileDuration / 16e9) + diffToBuildTime.append((buildTime - totalParseDuration - totalHashDuration - totalCompileDuration) / 16e9) - # changes graph - differentAstHashes.append(differentAstHash) - differentObjHashes.append(differentObjHash) - sameHashes.append(same) - fileCounts.append(fileCount) + # changes graph + differentAstHashes.append(differentAstHash) + differentObjHashes.append(differentObjHash) + sameHashes.append(same) + fileCounts.append(fileCount) prevCommit = currentCommit prevCommitID = commitID -- GitLab