From 2389c8e083ae3332722dcb8f262649eb88a9859b Mon Sep 17 00:00:00 2001 From: Ludwig Fueracker <ludwig.fueracker@fau.de> Date: Tue, 2 Aug 2016 18:08:41 +0200 Subject: [PATCH] more timing outputs and added change counting -> contains bug --- validate_hashes.py | 167 ++++++++++++++++++++++++++++++++------------- 1 file changed, 120 insertions(+), 47 deletions(-) diff --git a/validate_hashes.py b/validate_hashes.py index a169426..3b4eec1 100755 --- a/validate_hashes.py +++ b/validate_hashes.py @@ -20,18 +20,26 @@ errorCount = 0 astDifferObjSameCount = 0 missingCount = 0 -def validateHashes(records): +def validateHashes(recordList): global errorCount, astDifferObjSameCount, missingCount #TODO: this method assumes that all records are from the same object file - - iterRecords = iter(records) + recordList.reverse() # glaube die sind im moment falsch herum sortiert (neuester als erstes) + iterRecords = iter(recordList) prevRecord = next(iterRecords) filename = prevRecord['filename'] if 'ast-hash' not in prevRecord.keys(): - print "MISSING: no ast-hash in records for file " + filename + #print "MISSING: no ast-hash in records for file " + filename missingCount += 1 return + for record in iterRecords: +#TODO if prevRecord['start-time'] > record['start-time']: +#TODO print "Error: wrong order of records" + #TODO!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + if 'ast-hash' not in record.keys() or 'object-hash' not in record.keys(): + print "ERROR: stopping validating for file %s; no ast-hash available for commit %s" % (filename, record['commit-hash']) + break + if prevRecord['ast-hash'] == record['ast-hash']: if prevRecord['object-hash'] != record['object-hash']: printHashInfo("ast hashes same, object hashes differ", prevRecord, record) @@ -41,7 +49,7 @@ def validateHashes(records): printHashInfo("object hashes differ, ast hashes same", prevRecord, record) errorCount += 1 elif prevRecord['ast-hash'] != record['ast-hash']: - printHashInfo("ast hashes differ, object hashes same", prevRecord, record, False) + #printHashInfo("ast hashes differ, object hashes same", prevRecord, record, False) astDifferObjSameCount += 1 prevRecord = record @@ -57,13 +65,16 @@ def validateHashes(records): # ################################################################################ +pathToRecords = "" # gets set to command line parameter -def buildFullRecord(pathToRecords, pathToBuildTimes): +################################################################################ + +def buildFullRecord(): '''Builds a complete record from all the single hash records. The records are grouped by the commitIDs''' fullRecord = {} - with open(pathToBuildTimes, 'r') as buildTimesFile: + with open(pathToRecords + "/../buildTimes_musl.info", 'r') as buildTimesFile: buildTimes = eval(buildTimesFile.read()) for commitID in buildTimes: fullRecord[commitID] = {} @@ -73,7 +84,7 @@ def buildFullRecord(pathToRecords, pathToBuildTimes): #TODO: sort entries by time and store them sorted # have to sort commits somehow # => sort by time (start with oldest) - + # atm already sorted, but not if parallelized for recordFilename in getListOfFiles(pathToRecords): for line in open(recordFilename): @@ -93,46 +104,108 @@ def makeBuildTimeGraph(fullRecord): iterCommits = iter(fullRecord) prevCommit = fullRecord[next(iterCommits)] - f1 = open("/home/cip/2015/yb90ifym/clang-hash/build/muslHashes/times.csv", 'a') - - for commitID in iterCommits: - currentCommit = fullRecord[commitID] - totalOptimalRedundantCompileTime = 0 # ns - totalASTHashRedundantCompileTime = 0 # ns - currentFiles = currentCommit['files'] - prevFiles = prevCommit['files'] - for filename in currentFiles: - if 'ast-hash' not in currentFiles[filename].keys(): - #TODO: counter? - break - - currentRecord = currentFiles[filename] - prevRecord = prevFiles[filename] - - if prevRecord['object-hash'] == currentRecord['object-hash']: - totalOptimalRedundantCompileTime += currentRecord['compile-duration'] - if prevRecord['ast-hash'] == currentRecord['ast-hash']: - totalASTHashRedundantCompileTime += currentRecord['compile-duration'] - - buildTime = currentCommit['build-time'] # ns - optimalBuildTime = buildTime - totalOptimalRedundantCompileTime # = buildTime - sum(compileTime(file) if objhash(file) unchanged) - astHashBuildTime = buildTime - totalASTHashRedundantCompileTime # = buildTime - sum(compileTime(file) if asthash(file) unchanged) - - f1.write("%s;%s;%s;%s\n" % (commitNr, buildTime, optimalBuildTime, astHashBuildTime)) - - commitNr += 1 - prevCommit = currentCommit - - - f1.close() + with open(pathToRecords + "/../times.csv", 'w') as f_times: + f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % ("commitNr", "buildTime", "optimalBuildTime", "astHashBuildTime", "compileTimeOnly", "withoutCompileTime", "totalParsingTime", "totalHashingTime")) + + for commitID in iterCommits: + currentCommit = fullRecord[commitID] + totalOptimalRedundantCompileTime = 0 # ns + totalASTHashRedundantCompileTime = 0 # ns + currentFiles = currentCommit['files'] + prevFiles = prevCommit['files'] + compileTimeOnly = 0 # ns + totalParsingTime = 0 # ns + + for filename in currentFiles: + if 'ast-hash' not in currentFiles[filename].keys(): + #TODO: counter? + break + currentRecord = currentFiles[filename] + prevRecord = prevFiles[filename] + + compileTimeOnly += currentRecord['compile-duration'] # ns + totalParsingTime += currentRecord['parse-duration'] # ns + + if prevRecord['object-hash'] == currentRecord['object-hash']: + totalOptimalRedundantCompileTime += currentRecord['compile-duration'] # ns + if prevRecord['ast-hash'] == currentRecord['ast-hash']: + totalASTHashRedundantCompileTime += currentRecord['compile-duration'] # ns + + buildTime = currentCommit['build-time'] # ns + optimalBuildTime = buildTime - totalOptimalRedundantCompileTime # = buildTime - sum(compileTime(file) if objhash(file) unchanged) + astHashBuildTime = buildTime - totalASTHashRedundantCompileTime # = buildTime - sum(compileTime(file) if asthash(file) unchanged) + + f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % (commitNr, buildTime, optimalBuildTime, astHashBuildTime, compileTimeOnly, buildTime - compileTimeOnly, totalParsingTime, buildTime - compileTimeOnly - totalParsingTime)) + + commitNr += 1 + prevCommit = currentCommit + + +################################################################################ + +def makeChangesGraph(fullRecord): + commitNr = 0; + iterCommits = iter(fullRecord) + prevCommit = fullRecord[next(iterCommits)] + + with open(pathToRecords + "/../changes.csv", 'w') as f_changes: + f_changes.write("%s;%s;%s;%s\n" % ("commitNr", "differentAstHash", "differentObjHash", "same")) + + for commitID in iterCommits: + currentCommit = fullRecord[commitID] + currentFiles = currentCommit['files'] + prevFiles = prevCommit['files'] + same = 0 + differentAstHash = 0 + differentObjHash = 0 + + + print currentFiles['testfile.c'] + for filename in currentFiles: + if filename == 'testfile.c': + print "found testfile.c" + + for filename in currentFiles: + if filename == 'testfile.c': + print "found testfile.c (2)" + if 'ast-hash' not in currentFiles[filename].keys(): + print "ast-hash not in keys of file " + filename + break + currentRecord = currentFiles[filename] + prevRecord = prevFiles[filename] + +# if prevRecord['object-hash'] == currentRecord['object-hash'] or prevRecord['ast-hash'] == currentRecord['ast-hash']: +# test = 0 +# else: + if filename == 'testfile.c': + print prevRecord['object-hash'] + print currentRecord['object-hash'] + print prevRecord['ast-hash'] + print currentRecord['ast-hash'] + print '\n' + + if prevRecord['object-hash'] != currentRecord['object-hash']: + differentObjHash += 1 + differentAstHash += 1 + elif prevRecord['ast-hash'] != currentRecord['ast-hash']: + differentAstHash += 1 + else: + same += 1 + + f_changes.write("%s;%s;%s;%s\n" % (commitNr, differentAstHash, differentObjHash, same)) + #TODO: nicht als csv, sondern auch wieder als dict speichern! + #ausserdem am besten auch den commit-hash mitspeichern + commitNr += 1 + prevCommit = currentCommit + ################################################################################ + + # main: -#TODO: paths!!! if (len(sys.argv) > 1): pathToRecords = sys.argv[1] - records = [] for filename in getListOfFiles(pathToRecords): records = [eval(line) for line in open(filename)] validateHashes(records) @@ -140,14 +213,14 @@ if (len(sys.argv) > 1): - fullRecord = buildFullRecord(pathToRecords, "/home/cip/2015/yb90ifym/clang-hash/build/muslHashes/buildTimes_musl.info") + fullRecord = buildFullRecord() makeBuildTimeGraph(fullRecord) + makeChangesGraph(fullRecord) - -# f = open("/home/cip/2015/yb90ifym/clang-hash/build/muslHashes/fullRecord.info", 'a') -# f.write(repr(fullRecord) + "\n") -# f.close() + f = open(pathToRecords + "/../fullRecord.info", 'w') + f.write(repr(fullRecord) + "\n") + f.close() else: -- GitLab