Commit 1cdd593c authored by Ludwig Fueracker's avatar Ludwig Fueracker
Browse files

corrected different hash counts

parent 9defe951
Pipeline #3075 passed with stage
in 0 seconds
...@@ -99,9 +99,14 @@ sum_of_times = {'parsing': 0, ...@@ -99,9 +99,14 @@ sum_of_times = {'parsing': 0,
'compiling': 0} 'compiling': 0}
false_positive_records = {} # maps commitID -> set of source filenames of false positive records false_positive_records = {} # maps commitID -> set of source filenames of false positive records
all_ast_hashes = {} # maps commitID -> set(ast hashes)
all_obj_hashes = {} # maps commitID -> set(obj hashes)
all_files_hash_contained = {} # maps commitID -> set(filename), to prevent multiple hashes from the same file in one commit to be counted twice
defect_commits = 0 defect_commits = 0
commit_ids = set() commit_ids = set()
def validate_records(): def validate_records():
total_number_of_records = 0 total_number_of_records = 0
for filename in get_list_of_files(PATH_TO_RECORDS): for filename in get_list_of_files(PATH_TO_RECORDS):
...@@ -145,6 +150,20 @@ def validate_records(): ...@@ -145,6 +150,20 @@ def validate_records():
for k in sorted(false_positive_records, key=lambda k: len(false_positive_records[k]), reverse=True): for k in sorted(false_positive_records, key=lambda k: len(false_positive_records[k]), reverse=True):
print "\t%s: %d" % (k, len(false_positive_records[k])) print "\t%s: %d" % (k, len(false_positive_records[k]))
print "-----------------\n" print "-----------------\n"
diff_ast_hashes = 0
for v in all_ast_hashes.values():
diff_ast_hashes += len(v)
diff_obj_hashes = 0
for v in all_obj_hashes.values():
diff_obj_hashes += len(v)
print ">>> corrected >>>"
print "different AST hashes: %d" % diff_ast_hashes
print "different obj hashes: %d" % diff_obj_hashes
print "<<<<<<<<<<<<<<<<<"
write_to_csv([ [k,len(v)] for k,v in ast_hashes_dict.items() ], ['filename', 'nr of different hashes'], abs_path('different_ast_hashes_per_file.csv')) write_to_csv([ [k,len(v)] for k,v in ast_hashes_dict.items() ], ['filename', 'nr of different hashes'], abs_path('different_ast_hashes_per_file.csv'))
write_to_csv([ [k,len(v)] for k,v in obj_hashes_dict.items() ], ['filename', 'nr of different hashes'], abs_path('different_obj_hashes_per_file.csv')) write_to_csv([ [k,len(v)] for k,v in obj_hashes_dict.items() ], ['filename', 'nr of different hashes'], abs_path('different_obj_hashes_per_file.csv'))
...@@ -154,7 +173,6 @@ def print_hash_info(message, prev_record, record, is_error=True): ...@@ -154,7 +173,6 @@ def print_hash_info(message, prev_record, record, is_error=True):
print "%s: file %s, commits %s to %s : %s" % ("ERROR" if is_error else "INFO", record['filename'], prev_record['commit-hash'], record['commit-hash'], message) print "%s: file %s, commits %s to %s : %s" % ("ERROR" if is_error else "INFO", record['filename'], prev_record['commit-hash'], record['commit-hash'], message)
def validate_hashes(record_list): def validate_hashes(record_list):
''' All records in the list must be from the same object file''' ''' All records in the list must be from the same object file'''
#TODO: collect data from all files before validating (changing paths src and crt) and sort #TODO: collect data from all files before validating (changing paths src and crt) and sort
...@@ -165,6 +183,9 @@ def validate_hashes(record_list): ...@@ -165,6 +183,9 @@ def validate_hashes(record_list):
global ast_hashes_set, obj_hashes_set global ast_hashes_set, obj_hashes_set
global sum_of_times, nr_of_records global sum_of_times, nr_of_records
global unchanged_counter global unchanged_counter
global all_ast_hashes, all_obj_hashes, all_files_hash_contained
iter_records = iter(record_list) iter_records = iter(record_list)
prev_record = next(iter_records) prev_record = next(iter_records)
filename = prev_record['filename'] filename = prev_record['filename']
...@@ -175,6 +196,8 @@ def validate_hashes(record_list): ...@@ -175,6 +196,8 @@ def validate_hashes(record_list):
ast_hash_missing += 1 ast_hash_missing += 1
return return
# the different hashes of the current file # the different hashes of the current file
ast_hashes = set() ast_hashes = set()
obj_hashes = set() obj_hashes = set()
...@@ -184,6 +207,21 @@ def validate_hashes(record_list): ...@@ -184,6 +207,21 @@ def validate_hashes(record_list):
ast_hashes_set.add(prev_record['ast-hash']) ast_hashes_set.add(prev_record['ast-hash'])
obj_hashes_set.add(prev_record['object-hash']) obj_hashes_set.add(prev_record['object-hash'])
# setup for counting different ast/obj hashes:
commit_hash = prev_record['commit-hash']
if commit_hash not in all_ast_hashes:
all_ast_hashes[commit_hash] = set()
if commit_hash not in all_obj_hashes:
all_obj_hashes[commit_hash] = set()
if commit_hash not in all_files_hash_contained:
all_files_hash_contained[commit_hash] = set()
if filename not in all_files_hash_contained[commit_hash]:
all_ast_hashes[commit_hash].add(prev_record['ast-hash'])
all_obj_hashes[commit_hash].add(prev_record['object-hash'])
all_files_hash_contained[commit_hash].add(filename)
commit_ids.add(prev_record['commit-hash']) commit_ids.add(prev_record['commit-hash'])
nr_of_records += 1 nr_of_records += 1
...@@ -213,14 +251,30 @@ def validate_hashes(record_list): ...@@ -213,14 +251,30 @@ def validate_hashes(record_list):
else: else:
unchanged_counter += 1 unchanged_counter += 1
# if prev_record['ast-hash'] != record['ast-hash']:
ast_hashes.add(record['ast-hash']) ast_hashes.add(record['ast-hash'])
# if prev_record['object-hash'] != record['object-hash']:
obj_hashes.add(record['object-hash']) obj_hashes.add(record['object-hash'])
ast_hashes_set.add(record['ast-hash']) ast_hashes_set.add(record['ast-hash'])
obj_hashes_set.add(record['object-hash']) obj_hashes_set.add(record['object-hash'])
# now the correct approach:
if commit_hash not in all_ast_hashes:
all_ast_hashes[commit_hash] = set()
if commit_hash not in all_obj_hashes:
all_obj_hashes[commit_hash] = set()
if commit_hash not in all_files_hash_contained:
all_files_hash_contained[commit_hash] = set()
if filename not in all_files_hash_contained[commit_hash]:
if prev_record['ast-hash'] != record['ast-hash']:
all_ast_hashes[commit_hash].add(record['ast-hash'])
all_files_hash_contained[commit_hash].add(filename)
if prev_record['object-hash'] != record['object-hash']:
all_obj_hashes[commit_hash].add(record['object-hash'])
all_files_hash_contained[commit_hash].add(filename)
nr_of_records += 1 nr_of_records += 1
sum_of_times['parsing'] += record['parse-duration'] sum_of_times['parsing'] += record['parse-duration']
sum_of_times['hashing'] += record['hash-duration'] sum_of_times['hashing'] += record['hash-duration']
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment