From 80121d324832c6a7507ed55c5f06ae5cd1e1da9a Mon Sep 17 00:00:00 2001 From: Phillip Raffeck <phillip.raffeck@fau.de> Date: Fri, 8 Sep 2017 12:52:05 +0200 Subject: [PATCH] tacle-check: merge in code metrics analysis --- tools/tacle-check/CMakeLists.txt | 1 + tools/tacle-check/bin/tacle-check.py | 26 ++- tools/tacle-check/lib/tb_metrics.py | 253 +++++++++++++++++++++++++++ 3 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 tools/tacle-check/lib/tb_metrics.py diff --git a/tools/tacle-check/CMakeLists.txt b/tools/tacle-check/CMakeLists.txt index 64c17673029..128bc25024a 100644 --- a/tools/tacle-check/CMakeLists.txt +++ b/tools/tacle-check/CMakeLists.txt @@ -7,6 +7,7 @@ set(LibFiles tb_report.py tb_wcet.py tb_annotate.py + tb_metrics.py HTML.py ) diff --git a/tools/tacle-check/bin/tacle-check.py b/tools/tacle-check/bin/tacle-check.py index 30c3f2a6b58..9ba94abe2e4 100755 --- a/tools/tacle-check/bin/tacle-check.py +++ b/tools/tacle-check/bin/tacle-check.py @@ -8,6 +8,8 @@ import os.path import os import fnmatch +from collections import OrderedDict + # setup the library paths this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.dirname(this_dir)) @@ -22,6 +24,7 @@ import tb_exec_utils as exec_utils import tb_report as report import tb_wcet as wcet import tb_annotate as annotate +import tb_metrics as metrics LINTER = 'tacle-lint' @@ -155,6 +158,14 @@ def parse_args(): return parser, parser.parse_args() + parser.add_argument('-M', '--metrics', action='store_true', default=False, + dest='metrics', help='analyze benchmarks with certain metrics') + + parser.add_argument('-E', '--entry', default="%s_main", dest='entry', + help='specify entry point for analysis') + + parser.add_argument('-metrics-opt-level', '--metrics-opt-level', default="-O0", dest='metrics_opt_level', + help='specify optimization level') ## # check if the input directory looks as expected @@ -279,7 +290,7 @@ def main(): # hold all run tests # ['testname' : Reports] - the_tests = dict() + the_tests = OrderedDict() # 1.) COMPILERS for c in args.compilers: # forall compilers @@ -340,8 +351,6 @@ def main(): wcet_result_file = os.path.join(args.out_dir, "wcet.csv") wcet.write_WCET_result_csv(wcet_result_file, WCET_reports) - - # 2.) preopt if not args.opt is None: WCET_reports_pre_opt = wcet.run_parallel(wcet.process_benchmark_pre_optimizations, @@ -573,6 +582,17 @@ def run_sanitizing(out_dir, bench_files_dict, verbose=False): argv = ["clang", "-g", "-O2", "-fsanitize=address", "-o", binary] argv.extend(bench_files_dict[b]) + # 6.) BENCHMARK METRICS + if args.metrics: + result_reports = metrics.calc_metrics(args.out_dir, bench_files_dict, args.metrics_opt_level, args.timeout, args.entry, args.verbose) + + # create summary + metrics_result_file = os.path.join(args.out_dir, "metrics.csv") + metrics.summarize(result_reports, metrics_result_file) + the_tests.update(result_reports) + + if args.verbose: + print("written %s" % metrics_result_file) return_code = exec_utils.run_cmd(argv=argv, out_file="/dev/null", diff --git a/tools/tacle-check/lib/tb_metrics.py b/tools/tacle-check/lib/tb_metrics.py new file mode 100644 index 00000000000..e0abad6c6e6 --- /dev/null +++ b/tools/tacle-check/lib/tb_metrics.py @@ -0,0 +1,253 @@ +import os +import re + +import tb_exec_utils as exec_utils +import tb_report as report + +METRICS_TOOL = 'metrics' +INPUTS_NAME = 'find-inputs' +MCC_NAME = 'calc-mccabe' +LOOPS_NAME = 'loop-stats' +RECURSION_NAME = 'is-recursive' +FPU_NAME = 'uses-fpu' +CALLS_NAME = 'call-stack' +FPTR_NAME = 'uses-fptr' + +def combine_to_one_IR_file(bench_files, result_dir, ll_file, opt, timeout, verbose): + tmp_files = [] + for bench_file in bench_files: + filename, extension = os.path.splitext(os.path.basename(bench_file)) + tmp_file = os.path.join(result_dir, filename + "_tmp.ll") + argv = ['clang'] + argv.extend(['-o', tmp_file, '-emit-llvm', '-S']) + argv.extend(opt.split()) + argv.append(bench_file) + tmp_files.append(tmp_file) + ret = timed_exec(argv, verbose, timeout) + _, _, _, exitstatus = ret + if exitstatus != 0: + return ret + + argv = ['llvm-link'] + argv.extend(['-S', '-o', ll_file]) + argv.extend(tmp_files) + return timed_exec(argv, verbose, timeout) + +def report_build_error(bench_name, result_dir, stdout, stderr): + output = os.path.join(result_dir, "compile_output.txt") + f = open(output, "a"); + f.write('unable to compile:\n') + f.write(stdout) + f.write(stderr) + f.close() + return report.Report(name=bench_name, + file=output, + dir=result_dir, + date=report.datestring(), + ok=False, + fail_str="build error") + +# TODO move to exec_utils +def timed_exec(argv, verbose, timeout): + cmd = exec_utils.TimedCommand(argv, verbose=verbose) + return cmd.run(timeout=timeout) + + +def parse_inputs(results, stdout): + # extract complexity + if re.search("^Function.*has no input.$",stdout, re.MULTILINE) is not None: + ok = 0 + msg = "no input" + else: + ok = 1 + msg = "has input" + + results.append((INPUTS_NAME, ok, msg)) + +def parse_mcc(results, stdout): + # extract complexity + msg = re.search("^McCabe.*: (\d+)$", stdout, re.MULTILINE).group(1) + + results.append((MCC_NAME, 1, msg)) + +def parse_loops(results, stdout): + # extract results + msg = re.search("^Total Loops: (\d+)", stdout, re.MULTILINE).group(1) + nested_msg = re.search("^Nested Loops: (\d+)", stdout, re.MULTILINE).group(1) + + results.append((LOOPS_NAME + " total", 1, msg)) + results.append((LOOPS_NAME + " nested", 1, nested_msg)) + +def parse_fptr(results, stdout): + # extract result + res = re.search("^Function.* calls function via pointers: (\d+)", stdout, re.MULTILINE) + if res.group(1) == '1': + msg = "uses function ptrs" + else: + msg = "uses no function ptrs" + + results.append((FPTR_NAME, 1, msg)) + +def parse_calls(results, stdout): + # extract result + if re.search("^Unbound.*", stdout, re.MULTILINE): + msg = "unbound" + else: + msg = re.search("^Longest call chain: (\d+)", stdout, re.MULTILINE).group(1) + + results.append((CALLS_NAME, 1, msg)) + +def parse_recursion(results, stdout): + # extract result + msg = "uses no recursion" if re.search("uses no recursion", stdout) else "uses recursion" + + results.append((RECURSION_NAME, 1, msg)) + +def parse_fpu(results, stdout): + # extract result + res = re.search("^Function.*uses floating point values: (\d+)", stdout, re.MULTILINE) + if res.group(1) == '1': + msg = "uses floats" + else: + msg = "uses no floats" + cond_msg = re.search("^Conditionals using float.*: (\d+)", stdout, re.MULTILINE).group(1) + + results.append((FPU_NAME, 1, msg)) + results.append((FPU_NAME + " in conds", 1, cond_msg)) + +def parse_results(tools, reports, out_file, benchmark_name, result_dir, ret): + # TODO split to blocks + stdout, stderr, timeout_occured, returncode = ret + + if returncode != 0: + with open(out_file, "w") as f: + f.write(stderr) + if re.search("Entry point.*not found", stderr) is not None: + msg = "format error" + else: + msg = "unknown error" + + rep = report.Report(name=benchmark_name, + file=out_file, + dir=result_dir, + date=report.datestring(), + ok=0, + succ_str=msg, + fail_str=msg) + for tool in tools: + reports[tool][benchmark_name] = rep + + else: + with open(out_file, "w") as f: + f.write(stdout) + + results = [] + parse_inputs(results, stdout) + parse_loops(results, stdout) + parse_mcc(results, stdout) + parse_calls(results, stdout) + parse_recursion(results, stdout) + parse_fpu(results, stdout) + parse_fptr(results, stdout) + + for tool, success, msg in results: + rep = report.Report(name=benchmark_name, + file=out_file, + dir=result_dir, + date=report.datestring(), + ok=success, + succ_str=msg, + fail_str=msg) + reports[tool][benchmark_name] = rep + +def build_ll_file(ll_file, result_dir, bench_files, opt, timeout, verbose=False): + # if benchmark consists of more than one file + # create one IR file using llvm-link + if len(bench_files) > 1: + return combine_to_one_IR_file(bench_files, result_dir, ll_file, opt, timeout, verbose) + else: + argv = ['clang'] + argv.extend(['-o', ll_file, '-emit-llvm', '-S']) + argv.extend(opt.split()) + argv.extend(bench_files) + + return timed_exec(argv, verbose, timeout) + +def exec_metric(ll_file, benchmark_name, entry_point, timeout, verbose=False): + argv = [METRICS_TOOL] + argv.append("-all") + if (re.search("%s", entry_point)): + argv.append("-entry=" + entry_point % benchmark_name) + else: + argv.append("-entry=" + entry_point) + argv.append(ll_file) + + return timed_exec(argv, verbose, timeout) + +def calc_metrics(out_dir, bench_files_dict, opt, timeout, entry_point, verbose=False): + reports = dict() + tools = INPUTS_NAME, MCC_NAME, LOOPS_NAME + " total", LOOPS_NAME + " nested", RECURSION_NAME, FPU_NAME, FPU_NAME + " in conds", CALLS_NAME, FPTR_NAME + for tool in tools: + reports[tool] = dict() + + for b, files in bench_files_dict.iteritems(): + benchmark_name = os.path.basename(b) + + # make output directory + result_dir = os.path.join(out_dir, "metrics", benchmark_name) + ll_file = os.path.join(result_dir, benchmark_name + '.ll') + + # create the results dir + exec_utils.mkdir_p(result_dir) + + # compile benchmark to IR + ret = build_ll_file(ll_file, result_dir, files, opt, timeout, verbose) + out, err, _, exitstatus = ret + if exitstatus != 0: + rep = report_build_error(benchmark_name, result_dir, out, err) + for tool in tools: + reports[tool][benchmark_name] = rep + continue + + # calc all metrics + ret = exec_metric(ll_file, benchmark_name, entry_point, timeout, verbose) + + # parse metrics results + out_file = os.path.join(result_dir, "metrics_output.txt") + parse_results(tools, reports, out_file, benchmark_name, result_dir, ret) + + return reports + +def summarize(reports, output): + f = open(output, 'w') + f.write('position benchmark inputs mcc recursive call-stack fptr fpu fpu-in-cond loops nested-loops\n') + + def summarize_result(report, expected=None): + msg = report.get_success_str() + if msg == "format error" or msg == "build error": + return "error" + if expected is None: + return msg + if msg == expected: + return "yes" + return "no" + + benchmark_names = reports[INPUTS_NAME].keys() + benchmark_names.sort() + position = 1 + for b in benchmark_names: + name = b.replace('_', '\_') + inputs = summarize_result(reports[INPUTS_NAME][b], "has input") + mcc = summarize_result(reports[MCC_NAME][b]) + recursive = summarize_result(reports[RECURSION_NAME][b], "uses recursion") + callstack = summarize_result(reports[CALLS_NAME][b]) + fptr = summarize_result(reports[FPTR_NAME][b], "uses function ptrs") + fpu = summarize_result(reports[FPU_NAME][b], "uses floats") + fpu_in_cond = summarize_result(reports[FPU_NAME + " in conds"][b]) + loops = summarize_result(reports[LOOPS_NAME + " total"][b]) + nested = summarize_result(reports[LOOPS_NAME + " nested"][b]) + + f.write(" ".join([str(position), name, inputs, mcc, recursive, callstack, fptr, fpu, fpu_in_cond, loops, nested])) + f.write('\n') + position += 1 + f.close() -- GitLab