From 80121d324832c6a7507ed55c5f06ae5cd1e1da9a Mon Sep 17 00:00:00 2001
From: Phillip Raffeck <phillip.raffeck@fau.de>
Date: Fri, 8 Sep 2017 12:52:05 +0200
Subject: [PATCH] tacle-check: merge in code metrics analysis

---
 tools/tacle-check/CMakeLists.txt     |   1 +
 tools/tacle-check/bin/tacle-check.py |  26 ++-
 tools/tacle-check/lib/tb_metrics.py  | 253 +++++++++++++++++++++++++++
 3 files changed, 277 insertions(+), 3 deletions(-)
 create mode 100644 tools/tacle-check/lib/tb_metrics.py

diff --git a/tools/tacle-check/CMakeLists.txt b/tools/tacle-check/CMakeLists.txt
index 64c17673029..128bc25024a 100644
--- a/tools/tacle-check/CMakeLists.txt
+++ b/tools/tacle-check/CMakeLists.txt
@@ -7,6 +7,7 @@ set(LibFiles
     tb_report.py
     tb_wcet.py
     tb_annotate.py
+    tb_metrics.py
     HTML.py
     )
 
diff --git a/tools/tacle-check/bin/tacle-check.py b/tools/tacle-check/bin/tacle-check.py
index 30c3f2a6b58..9ba94abe2e4 100755
--- a/tools/tacle-check/bin/tacle-check.py
+++ b/tools/tacle-check/bin/tacle-check.py
@@ -8,6 +8,8 @@ import os.path
 import os
 import fnmatch
 
+from collections import OrderedDict
+
 # setup the library paths
 this_dir = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(os.path.dirname(this_dir))
@@ -22,6 +24,7 @@ import tb_exec_utils as exec_utils
 import tb_report as report
 import tb_wcet as wcet
 import tb_annotate as annotate
+import tb_metrics as metrics
 
 
 LINTER = 'tacle-lint'
@@ -155,6 +158,14 @@ def parse_args():
 
     return parser, parser.parse_args()
 
+    parser.add_argument('-M', '--metrics', action='store_true', default=False,
+                        dest='metrics', help='analyze benchmarks with certain metrics')
+
+    parser.add_argument('-E', '--entry', default="%s_main", dest='entry',
+                        help='specify entry point for analysis')
+
+    parser.add_argument('-metrics-opt-level', '--metrics-opt-level', default="-O0", dest='metrics_opt_level',
+                        help='specify optimization level')
 
 ##
 # check if the input directory looks as expected 
@@ -279,7 +290,7 @@ def main():
     
     # hold all run tests
     # ['testname' : Reports]
-    the_tests = dict()
+    the_tests = OrderedDict()
     
     # 1.) COMPILERS
     for c in args.compilers: # forall compilers 
@@ -340,8 +351,6 @@ def main():
         wcet_result_file = os.path.join(args.out_dir, "wcet.csv")
         wcet.write_WCET_result_csv(wcet_result_file, WCET_reports)
 
-            
-        
         # 2.) preopt
         if not args.opt is None:
             WCET_reports_pre_opt = wcet.run_parallel(wcet.process_benchmark_pre_optimizations,
@@ -573,6 +582,17 @@ def run_sanitizing(out_dir, bench_files_dict, verbose=False):
         argv = ["clang", "-g", "-O2", "-fsanitize=address", "-o", binary]
         argv.extend(bench_files_dict[b])
 
+    # 6.) BENCHMARK METRICS
+    if args.metrics:
+        result_reports = metrics.calc_metrics(args.out_dir, bench_files_dict, args.metrics_opt_level, args.timeout, args.entry, args.verbose)
+
+        # create summary
+        metrics_result_file = os.path.join(args.out_dir, "metrics.csv")
+        metrics.summarize(result_reports, metrics_result_file)
+        the_tests.update(result_reports)
+
+        if args.verbose:
+            print("written %s" % metrics_result_file)
 
         return_code = exec_utils.run_cmd(argv=argv,
                                          out_file="/dev/null",
diff --git a/tools/tacle-check/lib/tb_metrics.py b/tools/tacle-check/lib/tb_metrics.py
new file mode 100644
index 00000000000..e0abad6c6e6
--- /dev/null
+++ b/tools/tacle-check/lib/tb_metrics.py
@@ -0,0 +1,253 @@
+import os
+import re
+
+import tb_exec_utils as exec_utils
+import tb_report as report
+
+METRICS_TOOL = 'metrics'
+INPUTS_NAME = 'find-inputs'
+MCC_NAME = 'calc-mccabe'
+LOOPS_NAME = 'loop-stats'
+RECURSION_NAME = 'is-recursive'
+FPU_NAME = 'uses-fpu'
+CALLS_NAME = 'call-stack'
+FPTR_NAME = 'uses-fptr'
+
+def combine_to_one_IR_file(bench_files, result_dir, ll_file, opt, timeout, verbose):
+    tmp_files = []
+    for bench_file in bench_files:
+        filename, extension = os.path.splitext(os.path.basename(bench_file))
+        tmp_file = os.path.join(result_dir, filename + "_tmp.ll")
+        argv = ['clang']
+        argv.extend(['-o', tmp_file, '-emit-llvm', '-S'])
+        argv.extend(opt.split())
+        argv.append(bench_file)
+        tmp_files.append(tmp_file)
+        ret = timed_exec(argv, verbose, timeout)
+        _, _, _, exitstatus = ret
+        if exitstatus != 0:
+            return ret
+
+    argv = ['llvm-link']
+    argv.extend(['-S', '-o', ll_file])
+    argv.extend(tmp_files)
+    return timed_exec(argv, verbose, timeout)
+
+def report_build_error(bench_name, result_dir, stdout, stderr):
+    output = os.path.join(result_dir, "compile_output.txt")
+    f = open(output, "a");
+    f.write('unable to compile:\n')
+    f.write(stdout)
+    f.write(stderr)
+    f.close()
+    return report.Report(name=bench_name,
+                         file=output,
+                         dir=result_dir,
+                         date=report.datestring(),
+                         ok=False,
+                         fail_str="build error")
+
+# TODO move to exec_utils
+def timed_exec(argv, verbose, timeout):
+    cmd = exec_utils.TimedCommand(argv, verbose=verbose)
+    return cmd.run(timeout=timeout)
+
+
+def parse_inputs(results, stdout):
+    # extract complexity
+    if re.search("^Function.*has no input.$",stdout, re.MULTILINE) is not None:
+        ok = 0
+        msg = "no input"
+    else:
+        ok = 1
+        msg = "has input"
+
+    results.append((INPUTS_NAME, ok, msg))
+
+def parse_mcc(results, stdout):
+    # extract complexity
+    msg = re.search("^McCabe.*: (\d+)$", stdout, re.MULTILINE).group(1)
+
+    results.append((MCC_NAME, 1, msg))
+
+def parse_loops(results, stdout):
+    # extract results
+    msg = re.search("^Total Loops: (\d+)", stdout, re.MULTILINE).group(1)
+    nested_msg = re.search("^Nested Loops: (\d+)", stdout, re.MULTILINE).group(1)
+
+    results.append((LOOPS_NAME + " total", 1, msg))
+    results.append((LOOPS_NAME + " nested", 1, nested_msg))
+
+def parse_fptr(results, stdout):
+    # extract result
+    res = re.search("^Function.* calls function via pointers: (\d+)", stdout, re.MULTILINE)
+    if res.group(1) == '1':
+        msg = "uses function ptrs"
+    else:
+        msg = "uses no function ptrs"
+
+    results.append((FPTR_NAME, 1, msg))
+
+def parse_calls(results, stdout):
+    # extract result
+    if re.search("^Unbound.*", stdout, re.MULTILINE):
+        msg = "unbound"
+    else:
+        msg = re.search("^Longest call chain: (\d+)", stdout, re.MULTILINE).group(1)
+
+    results.append((CALLS_NAME, 1, msg))
+
+def parse_recursion(results, stdout):
+    # extract result
+    msg = "uses no recursion" if re.search("uses no recursion", stdout) else "uses recursion"
+
+    results.append((RECURSION_NAME, 1, msg))
+
+def parse_fpu(results, stdout):
+    # extract result
+    res =  re.search("^Function.*uses floating point values: (\d+)", stdout, re.MULTILINE)
+    if res.group(1) == '1':
+        msg = "uses floats"
+    else:
+        msg = "uses no floats"
+    cond_msg = re.search("^Conditionals using float.*: (\d+)", stdout, re.MULTILINE).group(1)
+
+    results.append((FPU_NAME, 1, msg))
+    results.append((FPU_NAME + " in conds", 1, cond_msg))
+
+def parse_results(tools, reports, out_file, benchmark_name, result_dir, ret):
+    # TODO split to blocks
+    stdout, stderr, timeout_occured, returncode = ret
+
+    if returncode != 0:
+        with open(out_file, "w") as f:
+            f.write(stderr)
+        if re.search("Entry point.*not found", stderr) is not None:
+            msg = "format error"
+        else:
+            msg = "unknown error"
+
+        rep = report.Report(name=benchmark_name,
+                            file=out_file,
+                            dir=result_dir,
+                            date=report.datestring(),
+                            ok=0,
+                            succ_str=msg,
+                            fail_str=msg)
+        for tool in tools:
+            reports[tool][benchmark_name] = rep
+
+    else:
+        with open(out_file, "w") as f:
+            f.write(stdout)
+
+        results = []
+        parse_inputs(results, stdout)
+        parse_loops(results, stdout)
+        parse_mcc(results, stdout)
+        parse_calls(results, stdout)
+        parse_recursion(results, stdout)
+        parse_fpu(results, stdout)
+        parse_fptr(results, stdout)
+
+        for tool, success, msg in results:
+            rep = report.Report(name=benchmark_name,
+                                file=out_file,
+                                dir=result_dir,
+                                date=report.datestring(),
+                                ok=success,
+                                succ_str=msg,
+                                fail_str=msg)
+            reports[tool][benchmark_name] = rep
+
+def build_ll_file(ll_file, result_dir, bench_files, opt, timeout, verbose=False):
+    # if benchmark consists of more than one file
+    # create one IR file using llvm-link
+    if len(bench_files) > 1:
+        return combine_to_one_IR_file(bench_files, result_dir, ll_file, opt, timeout, verbose)
+    else:
+        argv = ['clang']
+        argv.extend(['-o', ll_file, '-emit-llvm', '-S'])
+        argv.extend(opt.split())
+        argv.extend(bench_files)
+
+        return timed_exec(argv, verbose, timeout)
+
+def exec_metric(ll_file, benchmark_name, entry_point, timeout, verbose=False):
+    argv = [METRICS_TOOL]
+    argv.append("-all")
+    if (re.search("%s", entry_point)):
+        argv.append("-entry=" + entry_point % benchmark_name)
+    else:
+        argv.append("-entry=" + entry_point)
+    argv.append(ll_file)
+
+    return timed_exec(argv, verbose, timeout)
+
+def calc_metrics(out_dir, bench_files_dict, opt, timeout, entry_point, verbose=False):
+    reports = dict()
+    tools = INPUTS_NAME, MCC_NAME, LOOPS_NAME + " total", LOOPS_NAME + " nested", RECURSION_NAME, FPU_NAME, FPU_NAME + " in conds", CALLS_NAME, FPTR_NAME
+    for tool in tools:
+        reports[tool] = dict()
+
+    for b, files in bench_files_dict.iteritems():
+        benchmark_name = os.path.basename(b)
+
+        # make output directory
+        result_dir = os.path.join(out_dir, "metrics", benchmark_name)
+        ll_file = os.path.join(result_dir, benchmark_name + '.ll')
+
+        # create the results dir
+        exec_utils.mkdir_p(result_dir)
+
+        # compile benchmark to IR
+        ret = build_ll_file(ll_file, result_dir, files, opt, timeout, verbose)
+        out, err, _, exitstatus = ret
+        if exitstatus != 0:
+            rep = report_build_error(benchmark_name, result_dir, out, err)
+            for tool in tools:
+                reports[tool][benchmark_name] = rep
+            continue
+
+        # calc all metrics
+        ret = exec_metric(ll_file, benchmark_name, entry_point, timeout, verbose)
+
+        # parse metrics results
+        out_file = os.path.join(result_dir, "metrics_output.txt")
+        parse_results(tools, reports, out_file, benchmark_name, result_dir, ret)
+
+    return reports
+
+def summarize(reports, output):
+    f = open(output, 'w')
+    f.write('position benchmark inputs mcc recursive call-stack fptr fpu fpu-in-cond loops nested-loops\n')
+
+    def summarize_result(report, expected=None):
+        msg = report.get_success_str()
+        if msg == "format error" or msg == "build error":
+            return "error"
+        if expected is None:
+            return msg
+        if msg == expected:
+            return "yes"
+        return "no"
+
+    benchmark_names = reports[INPUTS_NAME].keys()
+    benchmark_names.sort()
+    position = 1
+    for b in benchmark_names:
+        name = b.replace('_', '\_')
+        inputs = summarize_result(reports[INPUTS_NAME][b], "has input")
+        mcc = summarize_result(reports[MCC_NAME][b])
+        recursive = summarize_result(reports[RECURSION_NAME][b], "uses recursion")
+        callstack = summarize_result(reports[CALLS_NAME][b])
+        fptr = summarize_result(reports[FPTR_NAME][b], "uses function ptrs")
+        fpu = summarize_result(reports[FPU_NAME][b], "uses floats")
+        fpu_in_cond = summarize_result(reports[FPU_NAME + " in conds"][b])
+        loops = summarize_result(reports[LOOPS_NAME + " total"][b])
+        nested = summarize_result(reports[LOOPS_NAME + " nested"][b])
+
+        f.write(" ".join([str(position), name, inputs, mcc, recursive, callstack, fptr, fpu, fpu_in_cond, loops, nested]))
+        f.write('\n')
+        position += 1
+    f.close()
-- 
GitLab