Skip to content
Snippets Groups Projects
Commit aad5c41e authored by Andreas Ziegler's avatar Andreas Ziegler
Browse files

librarystore: propagate users across PLT calls

So far, we only propagated users through direct calls or
calls to other exported functions in the same library via
the PLT. The code in interface_calls.py also checked for
PLT calls to imported functions but only printed a debug
message.

This change also extracts these calls on a per-function
basis and creates a mapping of functions to their calls to
imported functions. This mapping can be used to generate a
more fine-grained dependency analysis when propagating
users of exports through the dependency tree. For example,
we could only add a user to an export if the callee itself
has users (instead of always adding it while resolving the
location of imported symbols). As this requires a bit more
thought and work, this will be done in a later change.

For now, we propagate the users in addition to marking them
as used in LibraryStore.resolve_functions(). Propagation
now needs a worklist algorithm, as a propagation across
libraries makes it necessary to re-evaluate a library even
if that library has been processed before.
parent 90f5b9de
Branches
No related tags found
No related merge requests found
......@@ -68,6 +68,7 @@ def disassemble_objdump(library, start, length):
def find_calls_from_objdump(library, disas, symbols):
local_calls = set()
import_calls = set()
for _, decoded in disas:
match = CALL_REGEX.match(decoded)
......@@ -85,10 +86,9 @@ def find_calls_from_objdump(library, disas, symbols):
elif target in library.exports_plt:
local_calls.add(library.exports_plt[target])
elif target in library.imports_plt:
# TODO: see comment in capstone below
pass
import_calls.add(library.imports_plt[target])
return local_calls
return (local_calls, import_calls)
def disassemble_capstone(library, start, length):
disassembly = []
......@@ -118,6 +118,7 @@ def disassemble_capstone(library, start, length):
def find_calls_from_capstone(library, disas, symbols):
local_calls = set()
import_calls = set()
for instr in disas:
if instr.group(capstone.x86_const.X86_GRP_CALL) \
or instr.group(capstone.x86_const.X86_GRP_JUMP):
......@@ -130,17 +131,15 @@ def find_calls_from_capstone(library, disas, symbols):
elif target in library.exports_plt:
local_calls.add(library.exports_plt[target])
elif target in library.imports_plt:
# TODO: Here we could generate call graph data for used imports
# from other libraries, i.e. more fine grained usage data.
logging.debug('plt_import_call at %x to %s', instr.address,
library.imports_plt[target])
import_calls.add(library.imports_plt[target])
return local_calls
return (local_calls, import_calls)
def resolve_calls_in_library(library, disas_function=disassemble_capstone):
logging.debug('Processing %s', library.fullname)
before = time.time()
calls = defaultdict(set)
internal_calls = defaultdict(set)
external_calls = defaultdict(set)
ranges = library.get_function_ranges()
symbols = {}
......@@ -155,25 +154,27 @@ def resolve_calls_in_library(library, disas_function=disassemble_capstone):
for name, cur_range in ranges.items():
for start, size in cur_range:
disas, resolution_function = disas_function(library, start, size)
local_calls = resolution_function(library, disas, symbols)
local_calls, import_calls = resolution_function(library, disas, symbols)
if local_calls:
calls[name] = local_calls
internal_calls[name] = local_calls
if import_calls:
external_calls[name] = import_calls
after = time.time()
duration = after - before
logging.info('Thread %d: %s took %.3f s', os.getpid(),
library.fullname,
duration)
return (calls, (after - before))
return (internal_calls, external_calls, (after - before))
def map_wrapper(path):
try:
lib = Library(path, parse=True)
except (OSError, ELFError) as err:
logging.error('%s: %s', path, err)
return (None, None, 0)
calls, duration = resolve_calls_in_library(lib)
return (lib.fullname, calls, duration)
return (None, None, None, 0)
internal_calls, external_calls, duration = resolve_calls_in_library(lib)
return (lib.fullname, internal_calls, external_calls, duration)
def resolve_calls(store, n_procs=int(multiprocessing.cpu_count() * 1.5)):
libs = [lib.fullname for lib in sorted(store.get_library_objects(),
......@@ -183,11 +184,12 @@ def resolve_calls(store, n_procs=int(multiprocessing.cpu_count() * 1.5)):
result = pool.map(map_wrapper, libs, chunksize=1)
pool.close()
for fullname, calls, _ in result:
store[fullname].calls = calls
for fullname, internal_calls, external_calls, _ in result:
store[fullname].internal_calls = internal_calls
store[fullname].external_calls = external_calls
logging.info('... done!')
longest = [(v[0], v[2]) for v in sorted(result, key=lambda x: -x[2])]
longest = [(v[0], v[3]) for v in sorted(result, key=lambda x: -x[3])]
logging.info(longest[:20])
logging.info('total number of calls: %d', sum(len(v[1].values()) for v in result))
logging.info('total number of calls: %d', sum(len(v[2].values()) + len(v[1].values()) for v in result))
return result
......@@ -52,7 +52,9 @@ class Library:
self.runpaths = []
self.soname = None
self.calls = {}
self.ranges = None
self.external_calls = {}
self.internal_calls = {}
if parse:
self.parse_functions()
......@@ -183,6 +185,7 @@ class Library:
self.parse_dynamic()
self.parse_plt()
self.gather_hookable_addresses_from_symtab()
self.get_function_ranges()
if release:
self._release_elffile()
......@@ -217,10 +220,13 @@ class Library:
return False
def get_function_ranges(self):
ranges = collections.defaultdict(list)
if self.ranges is not None:
return self.ranges
self.ranges = collections.defaultdict(list)
section = self._elffile.get_section_by_name('.dynsym')
if not section:
return ranges
return self.ranges
for name in self.exports:
# Could me more than one => symbol versioning. One probably has the
......@@ -231,9 +237,9 @@ class Library:
for sym in syms:
start = self._get_symbol_offset(sym)
size = sym.entry['st_size']
ranges[name].append((start, size))
self.ranges[name].append((start, size))
return ranges
return self.ranges
def gather_hookable_addresses_from_symtab(self):
if self.elfheader['e_type'] == 'ET_EXEC':
......
......@@ -222,14 +222,10 @@ class LibraryStore(BaseStore):
# No cache hit, calculate it
local_cache = set()
# If there are no calls, return the empty set
if function not in library.calls:
cache[libname][function] = set()
return set()
if function in library.internal_calls:
working_on.add(function)
for callee in library.calls[function]:
local_cache.add(callee)
for callee in library.internal_calls[function]:
local_cache.add((callee, library))
if callee in working_on:
continue
subcalls = self.get_transitive_calls(library, callee, cache,
......@@ -237,6 +233,19 @@ class LibraryStore(BaseStore):
local_cache.update(subcalls)
working_on.remove(function)
if function in library.external_calls:
for callee in library.external_calls[function]:
if callee in library.imports:
target_lib = self.get_from_path(library.imports[callee])
else:
logging.debug('external_calls: no target for \'%s\'', callee)
continue
if target_lib is None:
logging.warning('%s: call to unknown target for function %s',
libname, callee)
continue
local_cache.add((callee, target_lib))
cache[libname][function] = local_cache
return cache[libname][function]
......@@ -300,9 +309,12 @@ class LibraryStore(BaseStore):
logging.info('Propagating export users through calls...')
libobjs = self.get_entry_points(all_entries)
lib_worklist = set(libobjs)
# Propagate usage information inside libraries
for lib in libobjs:
logging.debug('Propagating in %s', lib.fullname)
while lib_worklist:
lib = lib_worklist.pop()
logging.debug('Propagating in %s, worklist length: %d', lib.fullname,
len(lib_worklist))
# Starting points are all referenced exports
worklist = collections.deque(function for function, users
in lib.exports.items() if users)
......@@ -311,15 +323,18 @@ class LibraryStore(BaseStore):
cur = worklist.popleft()
users = lib.exports[cur]
# Add users to transitively called functions
for trans_callee in self.get_transitive_calls(lib, cur):
# Draw internal reference
lib.add_export_user(trans_callee, lib.fullname)
for (trans_callee, called_lib) in self.get_transitive_calls(lib, cur):
# Draw direct reference
called_lib.add_export_user(trans_callee, lib.fullname)
if lib != called_lib:
lib_worklist.add(called_lib)
for user in users:
# Add user to callee if not already present
if not lib.add_export_user(trans_callee, user):
if not called_lib.add_export_user(trans_callee, user):
continue
# Only add to worklist if not queued already
if trans_callee not in worklist:
# Only add to worklist if the callee is in the current
# library and it is not queued already
if called_lib == lib and trans_callee not in worklist:
worklist.append(trans_callee)
logging.info('... done!')
......@@ -357,10 +372,14 @@ class LibraryStore(BaseStore):
lib_dict["all_imported_libs"].append([lib, path])
lib_dict["rpaths"] = value.rpaths
# We can't dump sets, so convert to a list
calls_dict = {}
for caller, calls in value.calls.items():
calls_dict[caller] = list(calls)
lib_dict["calls"] = calls_dict
internal_calls_dict = {}
for caller, calls in value.internal_calls.items():
internal_calls_dict[caller] = list(calls)
lib_dict["internal_calls"] = internal_calls_dict
external_calls_dict = {}
for caller, calls in value.external_calls.items():
external_calls_dict[caller] = list(calls)
lib_dict["external_calls"] = external_calls_dict
output[key] = lib_dict
......@@ -404,8 +423,10 @@ class LibraryStore(BaseStore):
all_imported_libs_dict[lib] = path
library.all_imported_libs = all_imported_libs_dict
library.rpaths = value["rpaths"]
for caller, calls in value["calls"].items():
library.calls[caller] = set(calls)
for caller, calls in value["internal_calls"].items():
library.internal_calls[caller] = set(calls)
for caller, calls in value["external_calls"].items():
library.external_calls[caller] = set(calls)
#print('{}: {}'.format(key, sorted(value["calls"].items())))
self._add_library(key, library)
......
......@@ -213,7 +213,7 @@ class TestLibrary(unittest.TestCase):
store, lib = create_store_and_lib()
lib.parse_functions()
calls, _ = resolve_calls_in_library(lib, disassemble_capstone)
calls, _, _ = resolve_calls_in_library(lib, disassemble_capstone)
self.assertEqual(len(calls), 4)
self.assertDictEqual(calls, self.call_result)
......@@ -222,7 +222,7 @@ class TestLibrary(unittest.TestCase):
store, lib = create_store_and_lib()
lib.parse_functions()
calls, _ = resolve_calls_in_library(lib, disassemble_objdump)
calls, _, _ = resolve_calls_in_library(lib, disassemble_objdump)
self.assertEqual(len(calls), 4)
self.assertDictEqual(calls, self.call_result)
......@@ -231,7 +231,7 @@ class TestLibrary(unittest.TestCase):
store, lib = create_store_and_lib(TEST_LIB_PLT)
lib.parse_functions()
calls, _ = resolve_calls_in_library(lib, disassemble_capstone)
calls, _, _ = resolve_calls_in_library(lib, disassemble_capstone)
# The results should match the variant with symbolic functions
self.assertEquals(len(calls), 4)
......@@ -241,7 +241,7 @@ class TestLibrary(unittest.TestCase):
store, lib = create_store_and_lib(TEST_LIB_PLT)
lib.parse_functions()
calls, _ = resolve_calls_in_library(lib, disassemble_objdump)
calls, _, _ = resolve_calls_in_library(lib, disassemble_objdump)
# The results should match the variant with symbolic functions
self.assertEquals(len(calls), 4)
......@@ -253,7 +253,8 @@ class TestLibrary(unittest.TestCase):
result = resolve_calls(store)
# calls for mock.so, libc-2.23.so and ld-2.23.so
self.assertEqual(len(result), 3)
self.assertDictEqual(dict(store[lib.fullname].calls), self.call_result)
self.assertDictEqual(dict(store[lib.fullname].internal_calls),
self.call_result)
def test_5_transitive_calls(self):
store, lib = create_store_and_lib(resolve_libs_recursive=True,
......@@ -261,13 +262,22 @@ class TestLibrary(unittest.TestCase):
result = store.get_transitive_calls(lib, 'second_level_caller')
# Check that transitive callees are returned
self.assertSetEqual(result, set(['external_caller', 'external']))
self.assertSetEqual(result, set([('external_caller', lib),
('external', lib)]))
# Check that functions calling themselves recursively work and cover
# the use of the cache (external is called from recursive and its
# recursive_helper function)
result = store.get_transitive_calls(lib, 'recursive')
self.assertSetEqual(result, set(['external', 'recursive_helper', 'recursive']))
self.assertSetEqual(result, set([('external', lib),
('recursive_helper', lib),
('recursive', lib)]))
# Check transitive calls into other libraries
store.resolve_functions(lib)
result = store.get_transitive_calls(lib, 'ref_internal')
self.assertIn(('malloc@@GLIBC_2.2.5',
store.get_from_path(lib.needed_libs['libc.so.6'])), result)
def test_6_propagate_calls_all_entries(self):
store, binary = create_store_and_lib(TEST_BINARY,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment