From 2389c8e083ae3332722dcb8f262649eb88a9859b Mon Sep 17 00:00:00 2001
From: Ludwig Fueracker <ludwig.fueracker@fau.de>
Date: Tue, 2 Aug 2016 18:08:41 +0200
Subject: [PATCH] more timing outputs and added change counting -> contains bug

---
 validate_hashes.py | 167 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 120 insertions(+), 47 deletions(-)

diff --git a/validate_hashes.py b/validate_hashes.py
index a169426..3b4eec1 100755
--- a/validate_hashes.py
+++ b/validate_hashes.py
@@ -20,18 +20,26 @@ errorCount = 0
 astDifferObjSameCount = 0
 missingCount = 0    
 
-def validateHashes(records):
+def validateHashes(recordList):
     global errorCount, astDifferObjSameCount, missingCount
     #TODO: this method assumes that all records are from the same object file
-
-    iterRecords = iter(records)
+    recordList.reverse() # glaube die sind im moment falsch herum sortiert (neuester als erstes)
+    iterRecords = iter(recordList)
     prevRecord = next(iterRecords)
     filename = prevRecord['filename']
     if 'ast-hash' not in prevRecord.keys():
-        print "MISSING: no ast-hash in records for file " + filename
+        #print "MISSING: no ast-hash in records for file " + filename
         missingCount += 1
         return
+    
     for record in iterRecords:
+#TODO        if prevRecord['start-time'] > record['start-time']:
+#TODO            print "Error: wrong order of records"
+            #TODO!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+        if 'ast-hash' not in record.keys() or 'object-hash' not in record.keys():
+            print "ERROR: stopping validating for file %s; no ast-hash available for commit %s" % (filename, record['commit-hash'])
+            break
+
         if prevRecord['ast-hash'] == record['ast-hash']:
             if prevRecord['object-hash'] != record['object-hash']:
                 printHashInfo("ast hashes same, object hashes differ", prevRecord, record)
@@ -41,7 +49,7 @@ def validateHashes(records):
                 printHashInfo("object hashes differ, ast hashes same", prevRecord, record)
                 errorCount += 1
         elif prevRecord['ast-hash'] != record['ast-hash']:
-            printHashInfo("ast hashes differ, object hashes same", prevRecord, record, False)
+            #printHashInfo("ast hashes differ, object hashes same", prevRecord, record, False)
             astDifferObjSameCount += 1
 
         prevRecord = record
@@ -57,13 +65,16 @@ def validateHashes(records):
 #
 ################################################################################
 
+pathToRecords = "" # gets set to command line parameter
 
-def buildFullRecord(pathToRecords, pathToBuildTimes):
+################################################################################
+
+def buildFullRecord():
     '''Builds a complete record from all the single hash records.
        The records are grouped by the commitIDs'''
     fullRecord = {}
 
-    with open(pathToBuildTimes, 'r') as buildTimesFile:
+    with open(pathToRecords + "/../buildTimes_musl.info", 'r') as buildTimesFile:
         buildTimes = eval(buildTimesFile.read())
         for commitID in buildTimes:
             fullRecord[commitID] = {}
@@ -73,7 +84,7 @@ def buildFullRecord(pathToRecords, pathToBuildTimes):
         #TODO: sort entries by time and store them sorted
         # have to sort commits somehow
         # => sort by time (start with oldest)
-
+        # atm already sorted, but not if parallelized
 
     for recordFilename in getListOfFiles(pathToRecords):
         for line in open(recordFilename):
@@ -93,46 +104,108 @@ def makeBuildTimeGraph(fullRecord):
     iterCommits = iter(fullRecord)
     prevCommit = fullRecord[next(iterCommits)]
 
-    f1 = open("/home/cip/2015/yb90ifym/clang-hash/build/muslHashes/times.csv", 'a')
-   
-    for commitID in iterCommits:
-        currentCommit = fullRecord[commitID]
-        totalOptimalRedundantCompileTime = 0 # ns
-        totalASTHashRedundantCompileTime = 0 # ns
-        currentFiles = currentCommit['files']
-        prevFiles = prevCommit['files']
-        for filename in currentFiles:
-            if 'ast-hash' not in currentFiles[filename].keys():
-                #TODO: counter?
-                break
-       
-            currentRecord = currentFiles[filename]
-            prevRecord = prevFiles[filename]
-           
-            if prevRecord['object-hash'] == currentRecord['object-hash']:
-                totalOptimalRedundantCompileTime += currentRecord['compile-duration']
-            if prevRecord['ast-hash'] == currentRecord['ast-hash']:
-                totalASTHashRedundantCompileTime += currentRecord['compile-duration']
-
-        buildTime = currentCommit['build-time'] # ns
-        optimalBuildTime = buildTime - totalOptimalRedundantCompileTime # = buildTime - sum(compileTime(file) if objhash(file) unchanged)
-        astHashBuildTime = buildTime - totalASTHashRedundantCompileTime # = buildTime - sum(compileTime(file) if asthash(file) unchanged)
-
-        f1.write("%s;%s;%s;%s\n" % (commitNr, buildTime, optimalBuildTime, astHashBuildTime))
-
-        commitNr += 1
-        prevCommit = currentCommit        
-
-
-    f1.close()
+    with open(pathToRecords + "/../times.csv", 'w') as f_times:
+        f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % ("commitNr", "buildTime", "optimalBuildTime", "astHashBuildTime", "compileTimeOnly", "withoutCompileTime", "totalParsingTime", "totalHashingTime"))
+ 
+        for commitID in iterCommits:
+            currentCommit = fullRecord[commitID]
+            totalOptimalRedundantCompileTime = 0 # ns
+            totalASTHashRedundantCompileTime = 0 # ns
+            currentFiles = currentCommit['files']
+            prevFiles = prevCommit['files']
+            compileTimeOnly = 0 # ns
+            totalParsingTime = 0 # ns
+        
+            for filename in currentFiles:
+                if 'ast-hash' not in currentFiles[filename].keys():
+                    #TODO: counter?
+                    break
+                currentRecord = currentFiles[filename]
+                prevRecord = prevFiles[filename]
+            
+                compileTimeOnly += currentRecord['compile-duration'] # ns
+                totalParsingTime += currentRecord['parse-duration'] # ns
+
+                if prevRecord['object-hash'] == currentRecord['object-hash']:
+                    totalOptimalRedundantCompileTime += currentRecord['compile-duration'] # ns
+                if prevRecord['ast-hash'] == currentRecord['ast-hash']:
+                    totalASTHashRedundantCompileTime += currentRecord['compile-duration'] # ns
+
+            buildTime = currentCommit['build-time'] # ns
+            optimalBuildTime = buildTime - totalOptimalRedundantCompileTime # = buildTime - sum(compileTime(file) if objhash(file) unchanged)
+            astHashBuildTime = buildTime - totalASTHashRedundantCompileTime # = buildTime - sum(compileTime(file) if asthash(file) unchanged)
+
+            f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % (commitNr, buildTime, optimalBuildTime, astHashBuildTime, compileTimeOnly, buildTime - compileTimeOnly, totalParsingTime, buildTime - compileTimeOnly - totalParsingTime))
+
+            commitNr += 1
+            prevCommit = currentCommit        
+
+
+################################################################################
+
+def makeChangesGraph(fullRecord):
+    commitNr = 0;
+    iterCommits = iter(fullRecord)
+    prevCommit = fullRecord[next(iterCommits)]
+
+    with open(pathToRecords + "/../changes.csv", 'w') as f_changes:
+        f_changes.write("%s;%s;%s;%s\n" % ("commitNr", "differentAstHash", "differentObjHash", "same"))
+ 
+        for commitID in iterCommits:
+            currentCommit = fullRecord[commitID]
+            currentFiles = currentCommit['files']
+            prevFiles = prevCommit['files']
+            same = 0
+            differentAstHash = 0
+            differentObjHash = 0
+
+
+            print currentFiles['testfile.c']
+            for filename in currentFiles:
+                if filename == 'testfile.c':
+                    print "found testfile.c"
+
+            for filename in currentFiles:
+                if filename == 'testfile.c':
+                    print "found testfile.c (2)"
+                if 'ast-hash' not in currentFiles[filename].keys():
+                    print "ast-hash not in keys of file " + filename
+                    break
+                currentRecord = currentFiles[filename]
+                prevRecord = prevFiles[filename]
+
+#                if prevRecord['object-hash'] == currentRecord['object-hash'] or prevRecord['ast-hash'] == currentRecord['ast-hash']:
+#                    test = 0
+#                else:
+                if filename == 'testfile.c':
+                    print prevRecord['object-hash']
+                    print currentRecord['object-hash']
+                    print prevRecord['ast-hash']
+                    print currentRecord['ast-hash']
+                    print '\n'
+
+                if prevRecord['object-hash'] != currentRecord['object-hash']:
+                    differentObjHash += 1
+                    differentAstHash += 1
+                elif prevRecord['ast-hash'] != currentRecord['ast-hash']:
+                    differentAstHash += 1
+                else:
+                    same += 1
+    
+            f_changes.write("%s;%s;%s;%s\n" % (commitNr, differentAstHash, differentObjHash, same))
+            #TODO: nicht als csv, sondern auch wieder als dict speichern!
+            #ausserdem am besten auch den commit-hash mitspeichern
+            commitNr += 1
+            prevCommit = currentCommit
+
 
 ################################################################################
+
+
 # main:
-#TODO: paths!!!
 if (len(sys.argv) > 1):
     pathToRecords = sys.argv[1]
     
-    records = []
     for filename in getListOfFiles(pathToRecords):
         records = [eval(line) for line in open(filename)]
         validateHashes(records)
@@ -140,14 +213,14 @@ if (len(sys.argv) > 1):
 
 
 
-    fullRecord = buildFullRecord(pathToRecords, "/home/cip/2015/yb90ifym/clang-hash/build/muslHashes/buildTimes_musl.info")
+    fullRecord = buildFullRecord()
 
     makeBuildTimeGraph(fullRecord)
+    makeChangesGraph(fullRecord)
 
-
-#    f = open("/home/cip/2015/yb90ifym/clang-hash/build/muslHashes/fullRecord.info", 'a')
-#    f.write(repr(fullRecord) + "\n")
-#    f.close()
+    f = open(pathToRecords + "/../fullRecord.info", 'w')
+    f.write(repr(fullRecord) + "\n")
+    f.close()
 
 
 else:
-- 
GitLab