Commit 7d5fe5b1 authored by Ludwig Fueracker's avatar Ludwig Fueracker
Browse files

now analyzing in correct order of commits (order still missing in...

now analyzing in correct order of commits (order still missing in validateRecords), fixed some issues with moved files, refactored a bit
parent bc37f46e
Pipeline #2590 passed with stage
in 21 seconds
......@@ -5,6 +5,12 @@ import os
import sys
from operator import itemgetter
FULLRECORDFILENAME = "/../fullRecord.info"
COMMITINFOFILENAME = "/../commitInfo_musl.info"
#TODO: define all filenames here
pathToRecords = "" # gets set to command line parameter
def getListOfFiles(directory):
for root, dirnames, filenames in os.walk(directory):
......@@ -15,18 +21,29 @@ def getListOfFiles(directory):
def printHashInfo(message, prevRecord, record, isError=True):
print "%s: file %s, commits %s to %s : %s" % ("ERROR" if isError else "INFO", record['filename'], prevRecord['commit-hash'], record['commit-hash'], message)
errorCount = 0
astDifferObjSameCount = 0
missingCount = 0
def validateRecords():
for filename in getListOfFiles(pathToRecords):
records = [eval(line) for line in open(filename)]
validateHashes(records)
print "Errors: %d, Infos: %d, Missing: %d" % (errorCount, astDifferObjSameCount, missingCount)
def validateHashes(recordList):
#TODO: also sort this, perhaps execute on fullRecords or check against sorted commitIDs
global errorCount, astDifferObjSameCount, missingCount
#TODO: this method assumes that all records are from the same object file
recordList.reverse() # glaube die sind im moment falsch herum sortiert (neuester als erstes)
iterRecords = iter(recordList)
prevRecord = next(iterRecords)
filename = prevRecord['filename']
if 'ast-hash' not in prevRecord.keys():
#print "MISSING: no ast-hash in records for file " + filename
missingCount += 1
......@@ -40,10 +57,12 @@ def validateHashes(recordList):
print "ERROR: stopping validating for file %s; no ast-hash available for commit %s" % (filename, record['commit-hash'])
break
if prevRecord['ast-hash'] == record['ast-hash']:
if prevRecord['object-hash'] != record['object-hash']:
printHashInfo("ast hashes same, object hashes differ", prevRecord, record)
errorCount += 1
# if prevRecord['ast-hash'] == record['ast-hash']:
# if prevRecord['object-hash'] != record['object-hash']:
# printHashInfo("ast hashes same, object hashes differ", prevRecord, record)
# errorCount += 1
#TODO: deckt nicht das obere if auch alle faelle vom unteren mit ab?
#TODO: refactoren, das obere kann man weglassen...
if prevRecord['object-hash'] != record['object-hash']:
if prevRecord['ast-hash'] == record['ast-hash']:
printHashInfo("object hashes differ, ast hashes same", prevRecord, record)
......@@ -65,27 +84,27 @@ def validateHashes(recordList):
#
################################################################################
pathToRecords = "" # gets set to command line parameter
################################################################################
def buildFullRecord():
'''Builds a complete record from all the single hash records.
The records are grouped by the commitIDs'''
fullRecord = {}
with open(pathToRecords + "/../buildTimes_musl.info", 'r') as buildTimesFile:
buildTimes = eval(buildTimesFile.read())
for commitID in buildTimes:
with open(pathToRecords + COMMITINFOFILENAME, 'r') as commitInfoFile:
commitInfo = eval(commitInfoFile.read())
#TODO: replace fillRecord[commitID] und commitInfo[commitID] with local vars/refs
for commitID in commitInfo:
fullRecord[commitID] = {}
fullRecord[commitID]['build-time'] = buildTimes[commitID]
fullRecord[commitID]['commit-time'] = commitInfo[commitID]['commit-time']
fullRecord[commitID]['build-time'] = commitInfo[commitID]['build-time']
fullRecord[commitID]['files'] = {}
fullRecord[commitID]['filesChanged'] = commitInfo[commitID]['filesChanged']
#TODO: sort entries by time and store them sorted
# have to sort commits somehow
# => sort by time (start with oldest)
# atm already sorted, but not if parallelized
if 'insertions' in commitInfo[commitID]:
fullRecord[commitID]['insertions'] = commitInfo[commitID]['insertions']
if 'deletions' in commitInfo[commitID]:
fullRecord[commitID]['deletions'] = commitInfo[commitID]['deletions']
for recordFilename in getListOfFiles(pathToRecords):
for line in open(recordFilename):
data = eval(line)
......@@ -97,15 +116,21 @@ def buildFullRecord():
return fullRecord
def getSortedCommitIDList(fullRecord):
return sorted(fullRecord, key=lambda x: (fullRecord[x]['commit-time']))
################################################################################
def makeBuildTimeGraph(fullRecord):
commitNr = 0;
iterCommits = iter(fullRecord)
sortedCommitIDs = getSortedCommitIDList(fullRecord)
iterCommits = iter(sortedCommitIDs)
prevCommit = fullRecord[next(iterCommits)]
with open(pathToRecords + "/../times.csv", 'w') as f_times:
f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % ("commitNr", "buildTime", "optimalBuildTime", "astHashBuildTime", "compileTimeOnly", "withoutCompileTime", "totalParsingTime", "totalHashingTime"))
f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % ("commitHash", "buildTime", "optimalBuildTime", "astHashBuildTime", "compileTimeOnly", "withoutCompileTime", "totalParsingTime", "totalHashingTime"))
for commitID in iterCommits:
currentCommit = fullRecord[commitID]
......@@ -118,7 +143,6 @@ def makeBuildTimeGraph(fullRecord):
for filename in currentFiles:
if 'ast-hash' not in currentFiles[filename].keys():
#TODO: counter?
break
currentRecord = currentFiles[filename]
prevRecord = prevFiles[filename]
......@@ -135,7 +159,7 @@ def makeBuildTimeGraph(fullRecord):
optimalBuildTime = buildTime - totalOptimalRedundantCompileTime # = buildTime - sum(compileTime(file) if objhash(file) unchanged)
astHashBuildTime = buildTime - totalASTHashRedundantCompileTime # = buildTime - sum(compileTime(file) if asthash(file) unchanged)
f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % (commitNr, buildTime, optimalBuildTime, astHashBuildTime, compileTimeOnly, buildTime - compileTimeOnly, totalParsingTime, buildTime - compileTimeOnly - totalParsingTime))
f_times.write("%s;%s;%s;%s;%s;%s;%s;%s\n" % (commitID, buildTime, optimalBuildTime, astHashBuildTime, compileTimeOnly, buildTime - compileTimeOnly, totalParsingTime, buildTime - compileTimeOnly - totalParsingTime))
commitNr += 1
prevCommit = currentCommit
......@@ -145,11 +169,12 @@ def makeBuildTimeGraph(fullRecord):
def makeChangesGraph(fullRecord):
commitNr = 0;
iterCommits = iter(fullRecord)
sortedCommitIDs = getSortedCommitIDList(fullRecord)
iterCommits = iter(sortedCommitIDs)
prevCommit = fullRecord[next(iterCommits)]
with open(pathToRecords + "/../changes.csv", 'w') as f_changes:
f_changes.write("%s;%s;%s;%s\n" % ("commitNr", "differentAstHash", "differentObjHash", "same"))
f_changes.write("%s;%s;%s;%s\n" % ("commitHash", "differentAstHash", "differentObjHash", "same"))
for commitID in iterCommits:
currentCommit = fullRecord[commitID]
......@@ -158,31 +183,23 @@ def makeChangesGraph(fullRecord):
same = 0
differentAstHash = 0
differentObjHash = 0
print commitID
print currentFiles['testfile.c']
for filename in currentFiles:
if filename == 'testfile.c':
print "found testfile.c"
for filename in currentFiles:
if filename == 'testfile.c':
print "found testfile.c (2)"
if 'ast-hash' not in currentFiles[filename].keys():
print "ast-hash not in keys of file " + filename
if filename[-2:] != '.s':
print "ast-hash not in keys of file " + filename
continue
currentRecord = currentFiles[filename]
prevRecord = prevFiles[filename]
# if prevRecord['object-hash'] == currentRecord['object-hash'] or prevRecord['ast-hash'] == currentRecord['ast-hash']:
# test = 0
# else:
if filename == 'testfile.c':
print prevRecord['object-hash']
print currentRecord['object-hash']
print prevRecord['ast-hash']
print currentRecord['ast-hash']
print '\n'
if filename not in prevFiles:
if 'src/' + filename in prevFiles:
print "file %s changed place to src/" % filename
prevRecord = prevFiles['src/' + filename]
else:
print "ERROR, MISSING FILE: %s not in prev, going on to next commit" % filename
continue
else:
prevRecord = prevFiles[filename]
if prevRecord['object-hash'] != currentRecord['object-hash']:
differentObjHash += 1
......@@ -192,9 +209,8 @@ def makeChangesGraph(fullRecord):
else:
same += 1
f_changes.write("%s;%s;%s;%s\n" % (commitNr, differentAstHash, differentObjHash, same))
#TODO: nicht als csv, sondern auch wieder als dict speichern!
#ausserdem am besten auch den commit-hash mitspeichern
f_changes.write("%s;%s;%s;%s\n" % (commitID, differentAstHash, differentObjHash, same))
#TODO: don't save as csv, but also as dict!
commitNr += 1
prevCommit = currentCommit
......@@ -205,23 +221,26 @@ def makeChangesGraph(fullRecord):
# main:
if (len(sys.argv) > 1):
pathToRecords = sys.argv[1]
for filename in getListOfFiles(pathToRecords):
records = [eval(line) for line in open(filename)]
validateHashes(records)
print "Errors: %d, Infos: %d, Missing: %d" % (errorCount, astDifferObjSameCount, missingCount)
pathToFullRecordFile = pathToRecords + FULLRECORDFILENAME
validateRecords()
fullRecord = buildFullRecord()
fullRecord = {}
if os.path.isfile(pathToFullRecordFile):
with open(pathToFullRecordFile, 'r') as fullRecordFile:
fullRecord = eval(fullRecordFile.read())
print "read full record from " + pathToFullRecordFile
else:
fullRecord = buildFullRecord()
f = open(pathToFullRecordFile, 'w')
f.write(repr(fullRecord) + "\n")
f.close()
print "built full record, wrote to" + pathToFullRecordFile
makeBuildTimeGraph(fullRecord)
print "finished BuildTimeGraph"
makeChangesGraph(fullRecord)
f = open(pathToRecords + "/../fullRecord.info", 'w')
f.write(repr(fullRecord) + "\n")
f.close()
print "finished ChangesGraph"
else:
print "Missing path to record files.\nUsage:\n\t%s pathToRecords" % sys.argv[0]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment