From eb6cfc3fd40cd6d309312b11f9621020bbcb2dd1 Mon Sep 17 00:00:00 2001
From: Florian Schmaus <flow@cs.fau.de>
Date: Tue, 11 Jun 2024 13:05:56 +0200
Subject: [PATCH] Add apps/MatMul.cpp

---
 apps/MatMul.cpp   | 294 ++++++++++++++++++++++++++++++++++++++++++++++
 apps/meson.build  |   6 +
 iwyu-mappings.imp |   2 +
 3 files changed, 302 insertions(+)
 create mode 100644 apps/MatMul.cpp

diff --git a/apps/MatMul.cpp b/apps/MatMul.cpp
new file mode 100644
index 00000000..b36c3384
--- /dev/null
+++ b/apps/MatMul.cpp
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+// Copyright © 2024 Florian Schmaus
+#include <array>
+#include <boost/numeric/ublas/io.hpp>
+#include <boost/numeric/ublas/matrix.hpp>
+#include <boost/program_options.hpp>
+#include <chrono>
+#include <cstdint>
+#include <cstdlib>
+#include <exception>
+#include <iostream>
+#include <random>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <vector>
+
+#include "Common.hpp"
+#include "CountingPrivateSemaphore.hpp"
+#include "Fiber.hpp"
+#include "Runtime.hpp"
+#include "Semaphore.hpp"
+#include "emper.hpp"
+
+namespace po = boost::program_options;
+
+template <class T>
+using matrix = boost::numeric::ublas::matrix<T>;
+
+static auto sequentialMatMul(matrix<double> &a, matrix<double> &b) -> matrix<double> {
+	const auto m = a.size1();	 // number of rows of a
+	const auto n = a.size2();	 // number of columns of a
+	if (n != b.size1()) DIE_MSG("Invalid argument");
+	const auto l = b.size2();	 // number of rows of b
+
+	matrix<double> res(m, l);
+
+	for (unsigned int i = 0; i < m; ++i) {
+		for (unsigned int j = 0; j < l; ++j) {
+			res(i, j) = 0;
+			for (unsigned int k = 0; k < n; ++k) {
+				res(i, j) = res(i, j) + a(i, k) * b(k, j);
+			}
+		}
+	}
+
+	return res;
+}
+
+static auto naiveParallelMatMul(matrix<double> &a, matrix<double> &b) -> matrix<double> {
+	const auto m = a.size1();	 // number of rows of a
+	const auto n = a.size2();	 // number of columns of a
+	if (n != b.size1()) DIE_MSG("Invalid argument");
+	const auto l = b.size2();	 // number of rows of b
+
+	matrix<double> res(m, l);
+
+	CPS cps;
+
+	for (unsigned int i = 0; i < m; ++i) {
+		for (unsigned int j = 0; j < l; ++j) {
+			spawn(
+					[i, j, n, &a, &b, &res] {
+						res(i, j) = 0;
+						for (unsigned int k = 0; k < n; ++k) {
+							res(i, j) = res(i, j) + a(i, k) * b(k, j);
+						}
+					},
+					cps);
+		}
+	}
+
+	cps.wait();
+
+	return res;
+}
+
+static auto stupidParallelMatMul(matrix<double> &a, matrix<double> &b) -> matrix<double> {
+	const auto m = a.size1();	 // number of rows of a
+	const auto n = a.size2();	 // number of columns of a
+	if (n != b.size1()) DIE_MSG("Invalid argument");
+	const auto l = b.size2();	 // number of rows of b
+
+	matrix<double> res(m, l);
+
+	CPS outerCps;
+
+	for (unsigned int i = 0; i < m; ++i) {
+		for (unsigned int j = 0; j < l; ++j) {
+			outerCps.incrementCounterByOne();
+
+			auto *innerCps = new CPS();
+			auto *sem = new emper::Semaphore(1);
+			auto *scalprod = new double;
+
+			for (unsigned int k = 0; k < n; ++k) {
+				spawn(
+						[i, j, k, sem, scalprod, &a, &b] {
+							double product = a(i, k) * b(k, j);
+
+							sem->acquire();
+							*scalprod += product;
+							sem->release();
+						},
+						*innerCps);
+			}
+
+			async([i, j, innerCps, sem, scalprod, &res, &outerCps] {
+				innerCps->wait();
+
+				res(i, j) = *scalprod;
+
+				outerCps.signal();
+
+				delete innerCps;
+				delete sem;
+				delete scalprod;
+			});
+		}
+	}
+
+	outerCps.wait();
+
+	return res;
+}
+
+template <class G>
+static auto createRandomMatrix(unsigned int m_dim, unsigned int n_dim, float zero_probability,
+															 G &g) -> matrix<double> {
+	double min = 0;
+	double max = 1'000'000'000'000'000;
+	std::uniform_real_distribution<> value_distribution(min, max);
+
+	std::bernoulli_distribution zero_distribution(zero_probability);
+
+	matrix<double> res(m_dim, n_dim);
+	for (unsigned int m = 0; m < m_dim; ++m) {
+		for (unsigned int n = 0; n < n_dim; ++n) {
+			double value;
+
+			if (zero_distribution(g))
+				value = 0;
+			else
+				value = value_distribution(g);
+
+			res(m, n) = value;
+		}
+	}
+
+	return res;
+}
+
+using ParMatMul = struct {
+	std::string name;
+	matrix<double> (*mul)(matrix<double> &a, matrix<double> &b);
+};
+
+namespace chrn = std::chrono;
+using Clock = std::chrono::high_resolution_clock;
+
+static auto matmul(const po::variables_map &vm) -> int {
+	const auto nthreads = vm["nthreads"].as<unsigned int>();
+	const auto seed = vm["seed"].as<std::uint_fast64_t>();
+	const auto m = vm["m"].as<unsigned int>();
+	const auto n = vm["n"].as<unsigned int>();
+	const auto l = vm["l"].as<unsigned int>();
+	const auto par_mat_muls = vm["parMatMuls"].as<unsigned int>();
+	const auto iters = vm["iters"].as<unsigned int>();
+	const auto zero_probability = vm["zeroProb"].as<float>();
+	const auto verify = vm["verify"].as<bool>();
+
+	Runtime runtime(nthreads);
+	std::mt19937_64 rng(seed);
+
+	std::vector<matrix<double>> a_mtxs, b_mtxs, seq_results;
+
+	for (unsigned int i = 0; i < par_mat_muls; ++i) {
+		auto a = createRandomMatrix(m, n, zero_probability, rng);
+		a_mtxs.push_back(a);
+
+		auto b = createRandomMatrix(n, l, zero_probability, rng);
+		b_mtxs.push_back(b);
+
+		auto seq_res = sequentialMatMul(a_mtxs[i], b_mtxs[i]);
+		seq_results.push_back(seq_res);
+	}
+
+	auto par_mat_mul_impls = std::to_array<ParMatMul>({
+			{"naive", naiveParallelMatMul},
+			{"stupid", stupidParallelMatMul},
+	});
+
+	int exitStatus = EXIT_SUCCESS;
+	Fiber *alphaFiber = Fiber::from([&] {
+		for (auto impl : par_mat_mul_impls) {
+			std::vector<chrn::milliseconds> durations(iters);
+			std::cout << "Start " << impl.name << " with " << iters << " iterations: " << std::flush;
+
+			for (unsigned int iter = 0; iter < iters; ++iter) {
+				std::vector<matrix<double>> results(par_mat_muls);
+				CPS cps(par_mat_muls);
+
+				chrn::time_point<Clock> start = Clock::now();
+				for (unsigned int i = 0; i < par_mat_muls; ++i) {
+					async([i, &cps, &impl, &results, &a_mtxs, &b_mtxs] {
+						auto a = a_mtxs[i];
+						auto b = b_mtxs[i];
+
+						auto res = impl.mul(a, b);
+
+						results[i] = res;
+						cps.signal();
+					});
+				}
+
+				cps.wait();
+				chrn::time_point<Clock> end = Clock::now();
+
+				std::cout << "." << std::flush;
+
+				auto duration = chrn::duration_cast<chrn::milliseconds>(end - start);
+				durations[iter] = duration;
+
+				if (!verify) continue;
+				for (unsigned int i = 0; i < par_mat_muls; ++i) {
+					if (boost::numeric::ublas::detail::equals(seq_results[i], results[i], 1.e-6, 0.))
+						continue;
+
+					// clang-format off
+					std::cerr
+						<< std::endl
+						<< "ERROR: Matrix " << i << " produced by " << impl.name << " incorrect" << std::endl
+						<< "Expected matrix: " << std::endl << seq_results[i] << std::endl
+						<< "Provided matrix: " << std::endl << results[i] << std::endl
+						;
+					// clang-format on
+
+					exitStatus = EXIT_FAILURE;
+					runtime.initiateTermination();
+					return;
+				}
+			}
+
+			std::cout << std::endl << "Implementation '" << impl.name << "' took ";
+			for (unsigned int i = 0; i < iters; ++i) {
+				std::cout << durations[i];
+				if (i + 1 != iters) std::cout << ", ";
+			}
+			std::cout << ";" << std::endl;
+		}
+
+		runtime.initiateTermination();
+	});
+	runtime.scheduleFromAnywhere(*alphaFiber);
+	runtime.waitUntilFinished();
+
+	return exitStatus;
+}
+
+auto main(int argc, char *argv[]) -> int {
+	po::options_description desc("Allowed options");
+	// clang-format off
+	desc.add_options()
+		("help", "Show help")
+		("nthreads", po::value<unsigned int>()->default_value(std::thread::hardware_concurrency()), "Number of worker threads used by EMPER's runtime system")
+		("seed", po::value<std::uint_fast64_t>()->default_value(20180922), "Initial seed of the random number generator")
+		("m", po::value<unsigned int>()->default_value(10), "Number of rows of the first matrix and of the result matrix")
+		("n", po::value<unsigned int>()->default_value(10), "Number of columns of the first matrix and of rows of the second matrix")
+		("l", po::value<unsigned int>()->default_value(10), "Number of columns of the second matrix and of the result matrix")
+		("parMatMuls", po::value<unsigned int>()->default_value(5), "Number of matrix multiplications to perform in parallel")
+		("iters", po::value<unsigned int>()->default_value(3), "Number of benchmark iterations")
+		("zeroProb", po::value<float>()->default_value(0.01), "Probaility of generating a zero when initializing the matrices")
+		("verify", po::value<bool>()->default_value(true), "Verify the result of the matrix multiplication")
+		;
+	// clang-format on
+
+	auto parse_result = po::command_line_parser(argc, argv).options(desc).run();
+
+	po::variables_map vm;
+	po::store(parse_result, vm);
+	po::notify(vm);
+
+	if (vm.count("help")) {
+		std::cout << desc << std::endl;
+		return EXIT_SUCCESS;
+	}
+
+	try {
+		return matmul(vm);
+	} catch (std::exception &e) {
+		std::cerr << e.what() << std::endl;
+		return EXIT_FAILURE;
+	}
+}
diff --git a/apps/meson.build b/apps/meson.build
index 1ab32678..54be3b49 100644
--- a/apps/meson.build
+++ b/apps/meson.build
@@ -60,6 +60,12 @@ if cpp_can_link_with_boost_program_options
     'FibChildStealing.cpp',
     dependencies: [emper_dep, boost_program_options_dep],
   )
+
+  mat_mul_exe = executable(
+    'mat-mul',
+    'MatMul.cpp',
+    dependencies: [emper_dep, boost_dep, boost_program_options_dep],
+  )
 endif
 
 subdir('fsearch')
diff --git a/iwyu-mappings.imp b/iwyu-mappings.imp
index b681f6ac..9eb3d503 100644
--- a/iwyu-mappings.imp
+++ b/iwyu-mappings.imp
@@ -1,5 +1,6 @@
 [
 	{ include: ["<bits/chrono.h>", "private", "<chrono>", "public"]},
+	{ include: ["<bits/chrono_io.h>", "private", "<chrono>", "public"]},
 	{ include: ["<bits/getopt_core.h>", "private", "<unistd.h>", "public"] },
 	{ include: ["@<gtest/.*>", "private", "<gtest/gtest.h>", "public"] },
 	{ include: ["<urcu/map/urcu-memb.h>", "private", "<urcu.h>", "public"] },
@@ -13,6 +14,7 @@
 	{ include: ["<bits/chrono.h>", "private", "<chrono>", "public" ] },
 	{ include: ["<boost/detail/basic_pointerbuf.hpp>", "private", "<boost/program_options.hpp>", "public"], },
 	{ include: ["<boost/lexical_cast/bad_lexical_cast.hpp>", "private", "<boost/program_options.hpp>", "public"], },
+	{ include: ["<boost/numeric/ublas/detail/matrix_assign.hpp>", "private", "<boost/numeric/ublas/matrix.hpp>", "public"], },
 	{ include: ["<boost/program_options/detail/parsers.hpp>", "private", "<boost/program_options.hpp>", "public"], },
 	{ include: ["<boost/program_options/detail/value_semantic.hpp>", "private", "<boost/program_options.hpp>", "public"], },
 	{ include: ["<boost/program_options/errors.hpp>", "private", "<boost/program_options.hpp>", "public"], },
-- 
GitLab