diff --git a/validate_hashes.py b/validate_hashes.py index f20141745eaa89193bd15cce009f1c246dac7236..eda9832b2f77eade43e6280025427b056e072689 100755 --- a/validate_hashes.py +++ b/validate_hashes.py @@ -5,6 +5,12 @@ import os import sys from operator import itemgetter +FULLRECORDFILENAME = "/../fullRecord.info" +COMMITINFOFILENAME = "/../commitInfo_musl.info" +#TODO: define all filenames here +pathToRecords = "" # gets set to command line parameter + + def getListOfFiles(directory): for root, dirnames, filenames in os.walk(directory): @@ -15,18 +21,29 @@ def getListOfFiles(directory): def printHashInfo(message, prevRecord, record, isError=True): print "%s: file %s, commits %s to %s : %s" % ("ERROR" if isError else "INFO", record['filename'], prevRecord['commit-hash'], record['commit-hash'], message) - + + errorCount = 0 astDifferObjSameCount = 0 missingCount = 0 + +def validateRecords(): + for filename in getListOfFiles(pathToRecords): + records = [eval(line) for line in open(filename)] + validateHashes(records) + print "Errors: %d, Infos: %d, Missing: %d" % (errorCount, astDifferObjSameCount, missingCount) + + def validateHashes(recordList): + #TODO: also sort this, perhaps execute on fullRecords or check against sorted commitIDs global errorCount, astDifferObjSameCount, missingCount #TODO: this method assumes that all records are from the same object file recordList.reverse() # glaube die sind im moment falsch herum sortiert (neuester als erstes) iterRecords = iter(recordList) prevRecord = next(iterRecords) filename = prevRecord['filename'] + if 'ast-hash' not in prevRecord.keys(): #print "MISSING: no ast-hash in records for file " + filename missingCount += 1 @@ -40,10 +57,12 @@ def validateHashes(recordList): print "ERROR: stopping validating for file %s; no ast-hash available for commit %s" % (filename, record['commit-hash']) break - if prevRecord['ast-hash'] == record['ast-hash']: - if prevRecord['object-hash'] != record['object-hash']: - printHashInfo("ast hashes same, object hashes differ", prevRecord, record) - errorCount += 1 +# if prevRecord['ast-hash'] == record['ast-hash']: +# if prevRecord['object-hash'] != record['object-hash']: +# printHashInfo("ast hashes same, object hashes differ", prevRecord, record) +# errorCount += 1 + #TODO: deckt nicht das obere if auch alle faelle vom unteren mit ab? + #TODO: refactoren, das obere kann man weglassen... if prevRecord['object-hash'] != record['object-hash']: if prevRecord['ast-hash'] == record['ast-hash']: printHashInfo("object hashes differ, ast hashes same", prevRecord, record) @@ -65,27 +84,27 @@ def validateHashes(recordList): # ################################################################################ -pathToRecords = "" # gets set to command line parameter - -################################################################################ def buildFullRecord(): '''Builds a complete record from all the single hash records. The records are grouped by the commitIDs''' fullRecord = {} - with open(pathToRecords + "/../buildTimes_musl.info", 'r') as buildTimesFile: - buildTimes = eval(buildTimesFile.read()) - for commitID in buildTimes: + with open(pathToRecords + COMMITINFOFILENAME, 'r') as commitInfoFile: + commitInfo = eval(commitInfoFile.read()) + #TODO: replace fillRecord[commitID] und commitInfo[commitID] with local vars/refs + for commitID in commitInfo: fullRecord[commitID] = {} - fullRecord[commitID]['build-time'] = buildTimes[commitID] + fullRecord[commitID]['commit-time'] = commitInfo[commitID]['commit-time'] + fullRecord[commitID]['build-time'] = commitInfo[commitID]['build-time'] fullRecord[commitID]['files'] = {} + fullRecord[commitID]['filesChanged'] = commitInfo[commitID]['filesChanged'] - #TODO: sort entries by time and store them sorted - # have to sort commits somehow - # => sort by time (start with oldest) - # atm already sorted, but not if parallelized - + if 'insertions' in commitInfo[commitID]: + fullRecord[commitID]['insertions'] = commitInfo[commitID]['insertions'] + if 'deletions' in commitInfo[commitID]: + fullRecord[commitID]['deletions'] = commitInfo[commitID]['deletions'] + for recordFilename in getListOfFiles(pathToRecords): for line in open(recordFilename): data = eval(line) @@ -97,15 +116,21 @@ def buildFullRecord(): return fullRecord + +def getSortedCommitIDList(fullRecord): + return sorted(fullRecord, key=lambda x: (fullRecord[x]['commit-time'])) + + ################################################################################ def makeBuildTimeGraph(fullRecord): commitNr = 0; - iterCommits = iter(fullRecord) + sortedCommitIDs = getSortedCommitIDList(fullRecord) + iterCommits = iter(sortedCommitIDs) prevCommit = fullRecord[next(iterCommits)] with open(pathToRecords + "/../times.csv", 'w') as f_times: - f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % ("commitNr", "buildTime", "optimalBuildTime", "astHashBuildTime", "compileTimeOnly", "withoutCompileTime", "totalParsingTime", "totalHashingTime")) + f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % ("commitHash", "buildTime", "optimalBuildTime", "astHashBuildTime", "compileTimeOnly", "withoutCompileTime", "totalParsingTime", "totalHashingTime")) for commitID in iterCommits: currentCommit = fullRecord[commitID] @@ -118,7 +143,6 @@ def makeBuildTimeGraph(fullRecord): for filename in currentFiles: if 'ast-hash' not in currentFiles[filename].keys(): - #TODO: counter? break currentRecord = currentFiles[filename] prevRecord = prevFiles[filename] @@ -135,7 +159,7 @@ def makeBuildTimeGraph(fullRecord): optimalBuildTime = buildTime - totalOptimalRedundantCompileTime # = buildTime - sum(compileTime(file) if objhash(file) unchanged) astHashBuildTime = buildTime - totalASTHashRedundantCompileTime # = buildTime - sum(compileTime(file) if asthash(file) unchanged) - f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % (commitNr, buildTime, optimalBuildTime, astHashBuildTime, compileTimeOnly, buildTime - compileTimeOnly, totalParsingTime, buildTime - compileTimeOnly - totalParsingTime)) + f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % (commitID, buildTime, optimalBuildTime, astHashBuildTime, compileTimeOnly, buildTime - compileTimeOnly, totalParsingTime, buildTime - compileTimeOnly - totalParsingTime)) commitNr += 1 prevCommit = currentCommit @@ -145,11 +169,12 @@ def makeBuildTimeGraph(fullRecord): def makeChangesGraph(fullRecord): commitNr = 0; - iterCommits = iter(fullRecord) + sortedCommitIDs = getSortedCommitIDList(fullRecord) + iterCommits = iter(sortedCommitIDs) prevCommit = fullRecord[next(iterCommits)] with open(pathToRecords + "/../changes.csv", 'w') as f_changes: - f_changes.write("%s;%s;%s;%s\n" % ("commitNr", "differentAstHash", "differentObjHash", "same")) + f_changes.write("%s;%s;%s;%s\n" % ("commitHash", "differentAstHash", "differentObjHash", "same")) for commitID in iterCommits: currentCommit = fullRecord[commitID] @@ -158,31 +183,23 @@ def makeChangesGraph(fullRecord): same = 0 differentAstHash = 0 differentObjHash = 0 + print commitID - - print currentFiles['testfile.c'] for filename in currentFiles: - if filename == 'testfile.c': - print "found testfile.c" - - for filename in currentFiles: - if filename == 'testfile.c': - print "found testfile.c (2)" if 'ast-hash' not in currentFiles[filename].keys(): - print "ast-hash not in keys of file " + filename + if filename[-2:] != '.s': + print "ast-hash not in keys of file " + filename continue currentRecord = currentFiles[filename] - prevRecord = prevFiles[filename] - -# if prevRecord['object-hash'] == currentRecord['object-hash'] or prevRecord['ast-hash'] == currentRecord['ast-hash']: -# test = 0 -# else: - if filename == 'testfile.c': - print prevRecord['object-hash'] - print currentRecord['object-hash'] - print prevRecord['ast-hash'] - print currentRecord['ast-hash'] - print '\n' + if filename not in prevFiles: + if 'src/' + filename in prevFiles: + print "file %s changed place to src/" % filename + prevRecord = prevFiles['src/' + filename] + else: + print "ERROR, MISSING FILE: %s not in prev, going on to next commit" % filename + continue + else: + prevRecord = prevFiles[filename] if prevRecord['object-hash'] != currentRecord['object-hash']: differentObjHash += 1 @@ -192,9 +209,8 @@ def makeChangesGraph(fullRecord): else: same += 1 - f_changes.write("%s;%s;%s;%s\n" % (commitNr, differentAstHash, differentObjHash, same)) - #TODO: nicht als csv, sondern auch wieder als dict speichern! - #ausserdem am besten auch den commit-hash mitspeichern + f_changes.write("%s;%s;%s;%s\n" % (commitID, differentAstHash, differentObjHash, same)) + #TODO: don't save as csv, but also as dict! commitNr += 1 prevCommit = currentCommit @@ -205,23 +221,26 @@ def makeChangesGraph(fullRecord): # main: if (len(sys.argv) > 1): pathToRecords = sys.argv[1] - - for filename in getListOfFiles(pathToRecords): - records = [eval(line) for line in open(filename)] - validateHashes(records) - print "Errors: %d, Infos: %d, Missing: %d" % (errorCount, astDifferObjSameCount, missingCount) + pathToFullRecordFile = pathToRecords + FULLRECORDFILENAME + validateRecords() - - fullRecord = buildFullRecord() + fullRecord = {} + if os.path.isfile(pathToFullRecordFile): + with open(pathToFullRecordFile, 'r') as fullRecordFile: + fullRecord = eval(fullRecordFile.read()) + print "read full record from " + pathToFullRecordFile + else: + fullRecord = buildFullRecord() + f = open(pathToFullRecordFile, 'w') + f.write(repr(fullRecord) + "\n") + f.close() + print "built full record, wrote to" + pathToFullRecordFile makeBuildTimeGraph(fullRecord) + print "finished BuildTimeGraph" makeChangesGraph(fullRecord) - - f = open(pathToRecords + "/../fullRecord.info", 'w') - f.write(repr(fullRecord) + "\n") - f.close() - + print "finished ChangesGraph" else: print "Missing path to record files.\nUsage:\n\t%s pathToRecords" % sys.argv[0]