Commit 7ed301c3 authored by Christian Dietrich's avatar Christian Dietrich
Browse files

objhash: different hashing of objectfiles

In some cases the gcc produces syntactially different object files,
which are semantically equivalent. For example, gcc uses an internal 'I
have seen a name'-counter to name function local variables which are
placed within a data-section.
parent cba9f9fe
......@@ -3,8 +3,7 @@
import fnmatch
import glob
import threading
from subprocess import Popen, PIPE
import hashlib
from subprocess import Popen, PIPE, check_output
import sys
import os
import re
......@@ -16,6 +15,8 @@ default_path = ".."
default_compile_command = "clang"
default_compile_flags = "-Wall"
default_hash_command = os.path.join(os.path.dirname(__file__), "../build/wrappers/clang-hash")
default_objhash_command = os.path.join(os.path.dirname(__file__), "../build/wrappers/hash-objectfile")
LOGLEVEL = 1
LOGLEVEL_DEBUG = 2
......@@ -184,7 +185,7 @@ def run_testcase_helper(test_case, check_name, future):
basename, ext = os.path.splitext(test_case)
# Add the testcase directory
dirname = os.path.dirname(test_case)
dirname = os.path.dirname(os.path.join(".", test_case))
compile_flags += " -I %s" % (dirname)
cruft = run_testcase_cleaner(basename)
......@@ -224,15 +225,12 @@ def run_testcase_helper(test_case, check_name, future):
output += x
# Hash Object File
object_hash = hashlib.sha1()
if ret_compiler == 0:
with open(object_file) as fd:
object_hash.update(fd.read())
object_hash = check_output([default_objhash_command, object_file]).strip()
else:
output += COLOR.format("!!! Failed: compile (%s) failed with %d\n" % (variant, ret_compiler),
"red", bold=True)
failed = True
object_hash = object_hash.hexdigest()
#### Run Clang-Hash
......@@ -258,7 +256,7 @@ def run_testcase_helper(test_case, check_name, future):
# Call clang-hash
if LOGLEVEL >= LOGLEVEL_NORMAL:
output += " {source_file} {object_hash} {ast_hash}\n".format(
output += " {source_file} obj:{object_hash} ast:{ast_hash}\n".format(
source_file=source_file,
object_hash=object_hash,
ast_hash=ast_hash)
......@@ -283,8 +281,8 @@ def run_testcase_helper(test_case, check_name, future):
continue
output += COLOR.format("!!! Failure: Different hashes\n", "red", True)
output += " {var_1} {obj} {ast}\n".format(**locals())
output += " {var_2} {other_obj} {other_ast}\n".format(**locals())
output += " {var_1} obj:{obj} ast{ast}\n".format(**locals())
output += " {var_2} obj:{other_obj} ast:{other_ast}\n".format(**locals())
failed = True
......
......@@ -10,3 +10,8 @@ execute_process(
COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink"
"${CMAKE_CURRENT_SOURCE_DIR}/compare-logs" "${CMAKE_CURRENT_BINARY_DIR}/compare-logs"
)
execute_process(
COMMAND "${CMAKE_COMMAND}" "-E" "create_symlink"
"${CMAKE_CURRENT_SOURCE_DIR}/hash-objectfile" "${CMAKE_CURRENT_BINARY_DIR}/hash-objectfile"
)
......@@ -3,4 +3,4 @@
printf -v ARGS "%q " "$@"
${CMAKE_C_COMPILER} -Xclang -load -Xclang ${PROJECT_BINARY_DIR}/src/libclang-hash.so -Xclang -plugin -Xclang hash-unit $ARGS
${CMAKE_C_COMPILER} -Xclang -load -Xclang ${PROJECT_BINARY_DIR}/src/libclang-hash.so -Xclang -add-plugin -Xclang hash-unit $ARGS -fsyntax-only
......@@ -35,13 +35,8 @@ if __name__ == "__main__":
objectfile = args[args.index("-o")+1]
objectfile_hash = None
if os.path.exists(objectfile):
# Strip filename symbol from binary
call(["strip", "-N", os.path.basename(objectfile).replace(".o", ".c"), objectfile])
with open(objectfile, "rb") as fd:
objectfile_data = fd.read()
objectfile_hash = hashlib.sha1()
objectfile_hash.update(objectfile_data)
objectfile_hash = objectfile_hash.hexdigest()
objectfile_hash = check_output(["hash-objectfile", objectfile]).strip()
objectfile_data = open(objectfile).read()
else:
objectfile_data = None
......
#!/usr/bin/python
import os
import sys
import subprocess
import re
import tempfile
import hashlib
def find_symbols(elf):
nm_output = subprocess.check_output(["nm", elf])
ret = []
for line in nm_output.strip("\n").split("\n"):
items = line.split()
if len(items) < 3:
continue
ret.append(items[2])
return ret
def find_sections(elf):
nm_output = subprocess.check_output(["readelf", "--sections", "--wide", elf])
ret = []
for line in nm_output.strip("\n").split("\n"):
items = [x for x in line.split() if x[0] == "."]
if len(items) == 1:
ret.append(items[0])
return ret
def normalize_names(names):
"""Find dangerous names, that change although nothing has changed"""
ret = []
for name in names:
(new, count) = re.subn("\\.([0-9]{4,})", ".%04d" % len(ret), name)
if count:
ret.append( (name, new) )
return ret
def hash_file(fn_in):
names = find_symbols(fn_in)
symbol_renames = normalize_names(names)
section_renames = normalize_names(find_sections(fn_in))
fn_rename = tempfile.NamedTemporaryFile()
for i in symbol_renames:
fn_rename.write("%s %s\n" % i)
fn_rename.flush()
extra_args = []
for (f, t) in section_renames:
extra_args += ["--rename-section", "%s=%s" % (f,t)]
c_file = os.path.basename(fn_in).replace(".o", ".c")
fn_out = tempfile.NamedTemporaryFile()
subprocess.check_call(["objcopy", fn_in,
"--strip-symbol=%s" % c_file,
"--redefine-syms=%s" % fn_rename.name] +
extra_args + [fn_out.name])
H = hashlib.sha1()
with fn_out as fd:
H.update(fd.read())
digest = H.hexdigest()
fn_rename.close()
fn_out.close()
return digest
if __name__ == "__main__":
if len(sys.argv) < 2:
sys.exit("Take exactly one argument: %s <ELF>" % sys.argv[1])
for fn_in in sys.argv[1:]:
digest = hash_file(fn_in)
print digest
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment