Commit f4144178 authored by Ludwig Fueracker's avatar Ludwig Fueracker
Browse files

renamed stuff, added outputs

parent 25f84c14
...@@ -16,112 +16,111 @@ PATH_TO_RECORDS = '' # gets set from command line parameter ...@@ -16,112 +16,111 @@ PATH_TO_RECORDS = '' # gets set from command line parameter
# data/record filenames # data/record filenames
INFO_EXTENSION = '.info' INFO_EXTENSION = '.info'
FULL_RECORD_FILENAME = 'fullRecord' + INFO_EXTENSION FULL_RECORD_FILENAME = 'full_record' + INFO_EXTENSION
COMMIT_INFO_FILENAME = 'buildInfo_musl_with_stop' + INFO_EXTENSION COMMIT_INFO_FILENAME = 'buildInfo_musl_with_stop' + INFO_EXTENSION
BUILD_TIME_DATA_FILENAME = 'totalBuildTimes.csv' BUILD_TIME_DATA_FILENAME = 'total_build_times.csv'
BUILD_TIME_FILENAME = 'totalBuildTimes.pdf' BUILD_TIME_FILENAME = 'total_build_times.pdf'
BUILD_TIME_DATA_HEADER = ['totalParseTimes', 'totalHashTimes', 'totalCompileTimes', 'diffToBuildTimes', 'totalBuildTimes'] BUILD_TIME_DATA_HEADER = ['total_parse_times', 'total_hash_times', 'total_compile_times', 'diff_to_build_times', 'total_build_times']
def abs_path(filename): def abs_path(filename):
"""Prepends the absolute path to the filename. """Prepends the absolute path to the filename."""
"""
return PATH_TO_RECORDS + '/../' + filename return PATH_TO_RECORDS + '/../' + filename
def getListOfFiles(directory): def get_list_of_files(directory):
for root, dirnames, filenames in os.walk(directory): for root, dirnames, filenames in os.walk(directory):
for filename in fnmatch.filter(filenames, '*' + INFO_EXTENSION): for filename in fnmatch.filter(filenames, '*' + INFO_EXTENSION):
yield os.path.join(root, filename) yield os.path.join(root, filename)
errorCount = 0
astDifferObjSameCount = 0
missingCount = 0
################################################################################ ################################################################################
# #
# #
################################################################################ ################################################################################
def build_key_translation_dict():
keyTranslationToNr = { key_translation_to_nr = {
'start-time': 0, 'start-time': 0,
'hash-start-time': 1, 'hash-start-time': 1,
'object-hash': 2, 'object-hash': 2,
'return-code': 3, 'return-code': 3,
'parse-duration': 4, 'parse-duration': 4,
'object-file-size': 5, 'object-file-size': 5,
'processed-bytes': 6, 'processed-bytes': 6,
'hash-duration': 7, 'hash-duration': 7,
'filename': 8, 'filename': 8,
'project': 9, 'project': 9,
'compile-duration': 10, # time the compiler was running (incl. parse-duration) 'compile-duration': 10, # time the compiler was running (incl. parse-duration)
'ast-hash': 11, 'ast-hash': 11,
'commit-hash': 12, 'commit-hash': 12,
'element-hashes': 13, 'element-hashes': 13,
'commit-time': 14, 'commit-time': 14,
'build-time': 15, # time the 'make -jx' command took, times x 'build-time': 15, # time the 'make -jx' command took, times x
'files': 16, 'files': 16,
'files-changed': 17, 'files-changed': 17,
'insertions': 18, 'insertions': 18,
'deletions': 19, 'deletions': 19,
'run_id': 20 'run_id': 20
} }
keyTranslationFromNr = {v: k for k, v in keyTranslationToNr.items()} key_translation_from_nr = {v: k for k, v in key_translation_to_nr.items()}
keyTranslation = keyTranslationToNr.copy() key_translation_dict = key_translation_to_nr.copy()
keyTranslation.update(keyTranslationFromNr) key_translation_dict.update(key_translation_from_nr)
return key_translation_dict
key_translation = build_key_translation_dict()
def tr(key): def tr(key):
"""lookup key translation (both directions)""" """lookup key translation (both directions)"""
return keyTranslation[key] return key_translation[key]
def buildFullRecordTo(pathToFullRecordFile): def build_full_record_to(path_to_full_record_file):
"""structure of full record: """structure of full record:
{commitID: {'build-time': time, files: {filename: {record}, filename: {record}}}} {commitID: {'build-time': time, files: {filename: {record}, filename: {record}}}}
""" """
fullRecord = buildFullRecord() full_record = build_full_record()
if DO_PRINT_RECORDS: if DO_PRINT_RECORDS:
f = open(pathToFullRecordFile, 'w') f = open(path_to_full_record_file, 'w')
try: try:
f.write(repr(fullRecord) + "\n") f.write(repr(full_record) + "\n")
except MemoryError as me: except MemoryError as me:
print me print me
raise raise
finally: finally:
print time.ctime() print time.ctime()
f.close() f.close()
print "built full record, wrote to " + pathToFullRecordFile print "built full record, wrote to " + path_to_full_record_file
return fullRecord return full_record
def buildFullRecord(): def build_full_record():
"""Builds a complete record from all the single hash records. """Builds a complete record from all the single hash records.
The records are grouped by the commitIDs The records are grouped by the commitIDs
""" """
fullRecord = {} full_record = {}
with open(abs_path(COMMIT_INFO_FILENAME), 'r') as commitInfoFile: with open(abs_path(COMMIT_INFO_FILENAME), 'r') as commit_infoFile:
commitInfo = eval(commitInfoFile.read()) commit_info = eval(commit_infoFile.read())
for run_id in commitInfo: for run_id in commit_info:
if not isinstance(run_id, int): # dict also contains key 'commit-hash' if not isinstance(run_id, int): # dict also contains key 'commit-hash'
continue; continue;
currentRecord = {} current_record = {}
currentRecord[tr('filename')] = commitInfo[run_id]['filename'] current_record[tr('filename')] = commit_info[run_id]['filename']
currentRecord[tr('build-time')] = commitInfo[run_id]['build-time'] current_record[tr('build-time')] = commit_info[run_id]['build-time']
currentRecord[tr('files')] = {} current_record[tr('files')] = {}
fullRecord[run_id] = currentRecord full_record[run_id] = current_record
for recordFilename in getListOfFiles(PATH_TO_RECORDS): for record_filename in get_list_of_files(PATH_TO_RECORDS):
for line in open(recordFilename): for line in open(record_filename):
data = eval(line) data = eval(line)
# commitID = data['commit-hash'] # commitID = data['commit-hash']
# del data['commit-hash'] # del data['commit-hash']
objFilename = data['obj-file'] obj_filename = data['obj-file']
del data['obj-file'] del data['obj-file']
...@@ -134,115 +133,124 @@ def buildFullRecord(): ...@@ -134,115 +133,124 @@ def buildFullRecord():
run_id = data['run_id'] run_id = data['run_id']
dataNewKeys = {tr(k): v for k, v in data.items()} data_new_keys = {tr(k): v for k, v in data.items()}
fullRecord[run_id][tr('files')][objFilename] = dataNewKeys full_record[run_id][tr('files')][obj_filename] = data_new_keys
return fullRecord return full_record
################################################################################ ################################################################################
def write_to_csv(data, columnNames, filename): def write_to_csv(data, column_names, filename):
with open(filename, "w") as csvFile: with open(filename, "w") as csv_file:
writer = csv.writer(csvFile) writer = csv.writer(csv_file)
writer.writerow(columnNames) writer.writerow(column_names)
for line in data: for line in data:
writer.writerow(line) writer.writerow(line)
def printAvg(data, name): def print_avg(data, name):
print 'avg %s: %f' % (name, sum(data)/float(len(data))) print 'avg %s: %f' % (name, sum(data)/float(len(data)))
################################################################################ ################################################################################
parseColor, hashColor, compileColor, remainColor = ('#FFFF66','#FF0000','#3399FF','#008800') parse_color, hash_color, compile_color, remain_color = ('#FFFF66','#FF0000','#3399FF','#008800')
def plot_build_time_graph1(data): def plot_build_time_graph1(data):
plotBuildTimeCompositionGraph(data[0], data[1], data[2], data[3]) plot_build_time_composition_graph(data[0], data[1], data[2], data[3])
def plotBuildTimeCompositionGraph(parseTimes, hashTimes, compileTimes, diffToBuildTime): # times in ms def plot_build_time_composition_graph(parse_times, hash_times, compile_times, diff_to_build_time): # times in ms
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.stackplot(np.arange(1, len(parseTimes)+1), # x axis ax.stackplot(np.arange(1, len(parse_times)+1), # x axis
[parseTimes, hashTimes, compileTimes, [parse_times, hash_times, compile_times,
#diffToBuildTime #diff_to_build_time
], colors=[parseColor,hashColor,compileColor, ], colors=[parse_color,hash_color,compile_color,
# remainColor # remain_color
], edgecolor='none') ], edgecolor='none')
plt.xlim(1,len(parseTimes)) plt.xlim(1,len(parse_times))
plt.xlabel('commits') plt.xlabel('commits')
plt.ylabel('time [s]') plt.ylabel('time [s]')
ax.set_yscale('log') ax.set_yscale('log')
lgd = ax.legend([#mpatches.Patch(color=remainColor), lgd = ax.legend([#mpatches.Patch(color=remain_color),
mpatches.Patch(color=compileColor), mpatches.Patch(color=compile_color),
mpatches.Patch(color=hashColor), mpatches.Patch(color=hash_color),
mpatches.Patch(color=parseColor)], mpatches.Patch(color=parse_color)],
[#'remaining build time', [#'remaining build time',
'compile time', 'hash time', 'parse time'], 'compile time', 'hash time', 'parse time'],
loc='center left', bbox_to_anchor=(1, 0.5)) loc='center left', bbox_to_anchor=(1, 0.5))
fig.savefig(abs_path(BUILD_TIME_FILENAME), bbox_extra_artists=(lgd,), bbox_inches='tight') fig.savefig(abs_path(BUILD_TIME_FILENAME), bbox_extra_artists=(lgd,), bbox_inches='tight')
printAvg(parseTimes, 'parse') print "\n-----------------"
printAvg(hashTimes, 'hash') print "average total times per build:"
printAvg(compileTimes, 'compile') print_avg(parse_times, 'parse')
printAvg(diffToBuildTime, 'remainder') print_avg(hash_times, 'hash')
print_avg(compile_times, 'compile')
print_avg(diff_to_build_time, 'remainder')
print ""
print "average times if header/source file touched"
print "-----------------\n"
################################################################################ ################################################################################
def makeGraphs(fullRecord): def make_graphs(full_record):
# data for build time graphs # data for build time graphs
totalParseTimes = [] total_parse_times = []
totalHashTimes = [] total_hash_times = []
totalCompileTimes = [] total_compile_times = []
totalBuildTimes = [] total_build_times = []
diffToBuildTimes = [] diff_to_build_times = []
# freshBuildRecord = fullRecord[0] parse_times_header_touched = []
for run_id in fullRecord: parse_times_source_touched = []
# freshBuildRecord = full_record[0]
for run_id in full_record:
if run_id < 2: # skip fresh build (and also 1st, seems to be buggy...) if run_id < 2: # skip fresh build (and also 1st, seems to be buggy...)
continue continue
currentRecord = fullRecord[run_id] current_record = full_record[run_id]
currentFiles = currentRecord[tr('files')] current_files = current_record[tr('files')]
filesChanged = len(currentFiles) # count changed files per run #TODO! files_changed = len(current_files) # count changed files per run #TODO!
print currentRecord[tr('filename')] print current_record[tr('filename')]
totalParseDuration = 0 total_parse_duration = 0
totalHashDuration = 0 total_hash_duration = 0
totalCompileDuration = 0 total_compile_duration = 0
for filename in currentFiles: # deal with first commit for filename in current_files: # deal with first commit
# if tr('ast-hash') not in currentFiles[filename].keys(): # if tr('ast-hash') not in current_files[filename].keys():
# print "error: missing AST hash for file %s" % filename # print "error: missing AST hash for file %s" % filename
# continue # continue
currentFileRecord = currentFiles[filename] current_file_record = current_files[filename]
totalParseDuration += currentFileRecord[tr('parse-duration')] total_parse_duration += current_file_record[tr('parse-duration')]
totalHashDuration += currentFileRecord[tr('hash-duration')] total_hash_duration += current_file_record[tr('hash-duration')]
totalCompileDuration += currentFileRecord[tr('compile-duration')] total_compile_duration += current_file_record[tr('compile-duration')]
# if totalParseDuration == 0:# or (totalCompileDuration/1e6) > 500000: # if total_parse_duration == 0:# or (total_compile_duration/1e6) > 500000:
# continue # continue
totalParseTimes.append(totalParseDuration / 1e6) # nano to milli total_parse_times.append(total_parse_duration / 1e6) # nano to milli
totalHashTimes.append(totalHashDuration / 1e6) total_hash_times.append(total_hash_duration / 1e6)
totalCompileTimes.append(totalCompileDuration / 1e6) total_compile_times.append(total_compile_duration / 1e6)
buildTime = currentRecord[tr('build-time')] build_time = current_record[tr('build-time')]
totalBuildTimes.append(buildTime / 1e6) total_build_times.append(build_time / 1e6)
diffToBuildTimes.append((buildTime - totalParseDuration - totalHashDuration - totalCompileDuration) / 1e6) diff_to_build_times.append((build_time - total_parse_duration - total_hash_duration - total_compile_duration) / 1e6)
print 'run_id %d, #filesChanged: %d' % (run_id, filesChanged) print 'run_id %d, #files_changed: %d' % (run_id, files_changed)
printAvg(totalBuildTimes, 'total') print_avg(total_build_times, 'total')
# save data to csv files # save data to csv files
buildTimeData = np.column_stack((totalParseTimes, totalHashTimes, totalCompileTimes, diffToBuildTimes, totalBuildTimes)) build_time_data = np.column_stack((total_parse_times, total_hash_times, total_compile_times, diff_to_build_times, total_build_times))
write_to_csv(buildTimeData, BUILD_TIME_DATA_HEADER, abs_path(BUILD_TIME_DATA_FILENAME)) write_to_csv(build_time_data, BUILD_TIME_DATA_HEADER, abs_path(BUILD_TIME_DATA_FILENAME))
plotBuildTimeCompositionGraph(totalParseTimes, totalHashTimes, totalCompileTimes, diffToBuildTimes) plot_build_time_composition_graph(total_parse_times, total_hash_times, total_compile_times, diff_to_build_times)
################################################################################ ################################################################################
"""functions for reading data from the csv files to skip full record building""" """functions for reading data from the csv files to skip full record building"""
...@@ -250,7 +258,7 @@ def makeGraphs(fullRecord): ...@@ -250,7 +258,7 @@ def makeGraphs(fullRecord):
def csv_files_are_existing(): def csv_files_are_existing():
return os.path.isfile(abs_path(BUILD_TIME_DATA_FILENAME)) return os.path.isfile(abs_path(BUILD_TIME_DATA_FILENAME))
def read_from_csv(filename, columnNames): def read_from_csv(filename, column_names):
data = [] data = []
with open(filename) as csv_file: with open(filename) as csv_file:
reader = csv.reader(csv_file) reader = csv.reader(csv_file)
...@@ -291,10 +299,10 @@ if (len(sys.argv) > 1): ...@@ -291,10 +299,10 @@ if (len(sys.argv) > 1):
# read_csv_data_and_plot_graphs() # read_csv_data_and_plot_graphs()
# print "finished graphs at %s" % time.ctime() # print "finished graphs at %s" % time.ctime()
# else: # else:
full_record = buildFullRecordTo(path_to_full_record_file) full_record = build_full_record_to(path_to_full_record_file)
print "finished building/loading full record at %s" % time.ctime() print "finished building/loading full record at %s" % time.ctime()
makeGraphs(full_record) make_graphs(full_record)
print "finished graphs at %s" % time.ctime() print "finished graphs at %s" % time.ctime()
print "Finished at %s" % time.ctime() print "Finished at %s" % time.ctime()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment