From 02d86d3e5216e51661974d069e1a7ed12fc12bad Mon Sep 17 00:00:00 2001
From: Ludwig Fueracker <ludwig.fueracker@fau.de>
Date: Sun, 29 Jan 2017 20:46:05 +0100
Subject: [PATCH] fixed axis units, exclude broken commits from build graphs

---
 evaluate_data_with_stop.py | 68 +++++++++++++++++++++++++++-----------
 validate_hashes.py         | 56 ++++++++++++++++---------------
 2 files changed, 77 insertions(+), 47 deletions(-)

diff --git a/evaluate_data_with_stop.py b/evaluate_data_with_stop.py
index 622eecd..53a5e11 100755
--- a/evaluate_data_with_stop.py
+++ b/evaluate_data_with_stop.py
@@ -150,7 +150,7 @@ def write_to_csv(data, column_names, filename):
 
 
 def print_avg(data, name):
-    print 'avg %s: %f' % (name, sum(data)/float(len(data)))
+    print 'avg %s: %d' % (name, int(sum(data)/float(len(data))))
 
 ################################################################################
 
@@ -160,18 +160,18 @@ def plot_build_time_graph1(data):
     plot_build_time_composition_graph(data[0], data[1], data[2], data[3])
 
 
-def plot_build_time_composition_graph(parse_times, hash_times, compile_times, diff_to_build_time): # times in ms
+def plot_build_time_composition_graph(parse_times, hash_times, compile_times, diff_to_build_time): # times in ns
     fig, ax = plt.subplots()
-
+#[i/1e6 for i in parse_times],
     ax.stackplot(np.arange(1, len(parse_times)+1), # x axis
-                 [parse_times, hash_times, compile_times,
+                 [[i/1e6 for i in parse_times], [i/1e6 for i in hash_times],[i/1e6 for i in compile_times], # ns to ms
                 #diff_to_build_time
                 ], colors=[parse_color,hash_color,compile_color,
                  #   remain_color
                 ], edgecolor='none')
     plt.xlim(1,len(parse_times))
     plt.xlabel('commits')
-    plt.ylabel('time [s]')
+    plt.ylabel('time [ms]')
     ax.set_yscale('log')
     lgd = ax.legend([#mpatches.Patch(color=remain_color),
                      mpatches.Patch(color=compile_color),
@@ -182,16 +182,6 @@ def plot_build_time_composition_graph(parse_times, hash_times, compile_times, di
                     loc='center left', bbox_to_anchor=(1, 0.5))
     fig.savefig(abs_path(BUILD_TIME_FILENAME), bbox_extra_artists=(lgd,), bbox_inches='tight')
 
-    print "\n-----------------"
-    print "average total times per build:"
-    print_avg(parse_times, 'parse')
-    print_avg(hash_times, 'hash')
-    print_avg(compile_times, 'compile')
-    print_avg(diff_to_build_time, 'remainder')
-    print ""
-    print "average times if header/source file touched"
-    print "-----------------\n"
-
 
 
 ################################################################################
@@ -205,7 +195,12 @@ def make_graphs(full_record):
     diff_to_build_times = []
 
     parse_times_header_touched = []
+    hash_times_header_touched = []
+    compile_times_header_touched = []
+
     parse_times_source_touched = []
+    hash_times_source_touched = []
+    compile_times_source_touched = []
 
 
 #    freshBuildRecord = full_record[0]
@@ -231,26 +226,59 @@ def make_graphs(full_record):
             total_hash_duration += current_file_record[tr('hash-duration')]
             total_compile_duration += current_file_record[tr('compile-duration')]
      
+            if current_record[tr('filename')].endswith('.h'):
+                parse_times_header_touched.append(total_parse_duration)
+                hash_times_header_touched.append(total_hash_duration)
+                compile_times_header_touched.append(total_compile_duration)
+            elif current_record[tr('filename')].endswith('.c'):
+                parse_times_source_touched.append(total_parse_duration)
+                hash_times_source_touched.append(total_hash_duration)
+                compile_times_source_touched.append(total_compile_duration)
+            else:
+                print "unknown file extension: " + filename
+
 #        if total_parse_duration == 0:# or (total_compile_duration/1e6) > 500000:
 #            continue
   
-        total_parse_times.append(total_parse_duration / 1e6) # nano to milli
-        total_hash_times.append(total_hash_duration / 1e6)
-        total_compile_times.append(total_compile_duration / 1e6)
+        total_parse_times.append(total_parse_duration)
+        total_hash_times.append(total_hash_duration)
+        total_compile_times.append(total_compile_duration)
         build_time = current_record[tr('build-time')]
-        total_build_times.append(build_time / 1e6)
-        diff_to_build_times.append((build_time - total_parse_duration - total_hash_duration - total_compile_duration) / 1e6)
+        total_build_times.append(build_time)
+        diff_to_build_times.append((build_time - total_parse_duration - total_hash_duration - total_compile_duration))
 
  
         print 'run_id %d, #files_changed: %d' % (run_id, files_changed)
 
+
+    print "\n---- Results ----"
+    print "avg total build times [ns]"
     print_avg(total_build_times, 'total')
+    print "-----------------"
+    print "average times if header file touched [ns]"
+    print_avg(parse_times_header_touched, 'parse')
+    print_avg(hash_times_header_touched, 'hash')
+    print_avg(compile_times_header_touched, 'compile')
+    print "-----------------"
+    print "average times if header source touched [ns]"
+    print_avg(parse_times_source_touched, 'parse')
+    print_avg(hash_times_source_touched, 'hash')
+    print_avg(compile_times_source_touched, 'compile')
+    print "-----------------"
+    print "average total times per build [ns]:"
+    print_avg(total_parse_times, 'parse')
+    print_avg(total_hash_times, 'hash')
+    print_avg(total_compile_times, 'compile')
+    print_avg(diff_to_build_times, 'remainder')
+    print "-----------------\n"
 
     # save data to csv files
     build_time_data = np.column_stack((total_parse_times, total_hash_times, total_compile_times, diff_to_build_times, total_build_times))
     write_to_csv(build_time_data, BUILD_TIME_DATA_HEADER, abs_path(BUILD_TIME_DATA_FILENAME))
 
     plot_build_time_composition_graph(total_parse_times, total_hash_times, total_compile_times, diff_to_build_times)
+
+
     
 ################################################################################
 """functions for reading data from the csv files to skip full record building"""
diff --git a/validate_hashes.py b/validate_hashes.py
index 5c23bac..370a7a4 100755
--- a/validate_hashes.py
+++ b/validate_hashes.py
@@ -119,7 +119,7 @@ def validate_records():
     print ""
     print "avg times:"
     for k,v in sum_of_times.items():
-        print "%s: %d" % (k, v/nr_of_records)
+        print "%s: %d ns" % (k, v/nr_of_records)
     print "-----------------\n"
 
     write_to_csv([ [k,len(v)] for k,v in ast_hashes_dict.items() ], ['filename', 'nr of different hashes'], abs_path('different_ast_hashes_per_file.csv'))
@@ -332,15 +332,15 @@ def plot_build_time_graph1(data):
 def plot_build_time_graph(measuredBuildTimes, realClangHashBuildTimes, optimalClangHashBuildTimes, optimalBuildTimes): # times in s
     fig, ax = plt.subplots()
 
-    ax.plot(measuredBuildTimes, label='measured build time')
-    ax.plot(realClangHashBuildTimes, label='real clang-hash build time')
-    ax.plot(optimalClangHashBuildTimes, label='optimal clang-hash build time')
-    ax.plot(optimalBuildTimes, label='optimal build time')
+    ax.plot([i/60 for i in measuredBuildTimes], label='measured build time')
+    ax.plot([i/60 for i in realClangHashBuildTimes], label='real clang-hash build time')
+    ax.plot([i/60 for i in optimalClangHashBuildTimes], label='optimal clang-hash build time')
+    ax.plot([i/60 for i in optimalBuildTimes], label='optimal build time')
 
     lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend on the right
-
+    ax.set_ylim([0,5])
     plt.xlabel('commits')
-    plt.ylabel('time [ms]')
+    plt.ylabel('time [min]')
     fig.savefig(abs_path(BUILD_TIMES_GRAPH_FILENAME), bbox_extra_artists=(lgd,), bbox_inches='tight')
 
 
@@ -375,19 +375,19 @@ def plot_build_time_composition_graph(parseTimes, hashTimes, compileTimes, diffT
     print_avg(diffToBuildTime, 'remainder')
 
 
-def plotTimeHistogram(times, filename):
+def plotTimeHistogram(times, filename): # times in ms
     #TODO: understand params and vars
-    hist, bins = np.histogram(times, bins=50)
+    hist, bins = np.histogram([i/1000 for i in times], bins=50) # times to s
     width = 0.7 * (bins[1] - bins[0])
     center = (bins[:-1] + bins[1:]) / 2
     fig, ax = plt.subplots()
-    plt.xlabel('time [ms]')
+    plt.xlabel('time [s]')
     plt.ylabel('#files')
     ax.bar(center, hist, align='center', width=width)
     fig.savefig(filename)
 
 
-def plotTimeMultiHistogram(parseTimes, hashTimes, compileTimes, filename):
+def plotTimeMultiHistogram(parseTimes, hashTimes, compileTimes, filename): # times in ms
     bins = np.linspace(0, 5000, 50)
     data = np.vstack([parseTimes, hashTimes, compileTimes]).T
     fig, ax = plt.subplots()
@@ -398,9 +398,9 @@ def plotTimeMultiHistogram(parseTimes, hashTimes, compileTimes, filename):
     fig.savefig(filename)
 
     fig, ax = plt.subplots()
-    data = [parseTimes, hashTimes, compileTimes]
-    plt.boxplot(data, 0, 'rs', 0, [5, 95])
-    plt.xlabel('time [ms]')
+    boxplot_data = [[i/1000 for i in parseTimes], [i/1000 for i in hashTimes], [i/1000 for i in compileTimes]] # times to s
+    plt.boxplot(boxplot_data, 0, 'rs', 0, [5, 95])
+    plt.xlabel('time [s]')
     plt.yticks([1, 2, 3], ['parsing', 'hashing', 'compiling'])
     #lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # legend on the right
     fig.savefig(filename[:-4] + '_boxplots' + GRAPH_EXTENSION)
@@ -570,22 +570,24 @@ def make_graphs(full_record):
         optimalAstHashBuildTime = buildTime - totalOptimalRedundantCompileTime
 
 
-        measuredBuildTimes.append(buildTime / 1e9) # nano to seconds
-        optimalBuildTimes.append(optimalBuildTime / 1e9)
-        optimalClangHashBuildTimes.append(optimalAstHashBuildTime / 1e9)
-        realClangHashBuildTimes.append(realAstHashBuildTime / 1e9)
+        #TODO: remove broken commits; ok?
+        if buildTime > 3e12 and totalParseDuration/1e9 > 300:
+            measuredBuildTimes.append(buildTime / 16e9) # nano to seconds; also /16 to account for make -j16
+            optimalBuildTimes.append(optimalBuildTime / 16e9)
+            optimalClangHashBuildTimes.append(optimalAstHashBuildTime / 16e9)
+            realClangHashBuildTimes.append(realAstHashBuildTime / 16e9)
 
-        totalParseTimes.append(totalParseDuration / 1e9) # nano to seconds
-        totalHashTimes.append(totalHashDuration / 1e9)
-        totalCompileTimes.append(totalCompileDuration / 1e9)
-        diffToBuildTime.append((buildTime - totalParseDuration - totalHashDuration - totalCompileDuration) / 1e9)
+            totalParseTimes.append(totalParseDuration / 16e9) # nano to seconds
+            totalHashTimes.append(totalHashDuration / 16e9)
+            totalCompileTimes.append(totalCompileDuration / 16e9)
+            diffToBuildTime.append((buildTime - totalParseDuration - totalHashDuration - totalCompileDuration) / 16e9)
 
 
-        # changes graph
-        differentAstHashes.append(differentAstHash)
-        differentObjHashes.append(differentObjHash)
-        sameHashes.append(same)
-        fileCounts.append(fileCount)
+            # changes graph
+            differentAstHashes.append(differentAstHash)
+            differentObjHashes.append(differentObjHash)
+            sameHashes.append(same)
+            fileCounts.append(fileCount)
 
         prevCommit = currentCommit
         prevCommitID = commitID
-- 
GitLab