From eb6cfc3fd40cd6d309312b11f9621020bbcb2dd1 Mon Sep 17 00:00:00 2001 From: Florian Schmaus <flow@cs.fau.de> Date: Tue, 11 Jun 2024 13:05:56 +0200 Subject: [PATCH] Add apps/MatMul.cpp --- apps/MatMul.cpp | 294 ++++++++++++++++++++++++++++++++++++++++++++++ apps/meson.build | 6 + iwyu-mappings.imp | 2 + 3 files changed, 302 insertions(+) create mode 100644 apps/MatMul.cpp diff --git a/apps/MatMul.cpp b/apps/MatMul.cpp new file mode 100644 index 00000000..b36c3384 --- /dev/null +++ b/apps/MatMul.cpp @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// Copyright © 2024 Florian Schmaus +#include <array> +#include <boost/numeric/ublas/io.hpp> +#include <boost/numeric/ublas/matrix.hpp> +#include <boost/program_options.hpp> +#include <chrono> +#include <cstdint> +#include <cstdlib> +#include <exception> +#include <iostream> +#include <random> +#include <string> +#include <thread> +#include <type_traits> +#include <vector> + +#include "Common.hpp" +#include "CountingPrivateSemaphore.hpp" +#include "Fiber.hpp" +#include "Runtime.hpp" +#include "Semaphore.hpp" +#include "emper.hpp" + +namespace po = boost::program_options; + +template <class T> +using matrix = boost::numeric::ublas::matrix<T>; + +static auto sequentialMatMul(matrix<double> &a, matrix<double> &b) -> matrix<double> { + const auto m = a.size1(); // number of rows of a + const auto n = a.size2(); // number of columns of a + if (n != b.size1()) DIE_MSG("Invalid argument"); + const auto l = b.size2(); // number of rows of b + + matrix<double> res(m, l); + + for (unsigned int i = 0; i < m; ++i) { + for (unsigned int j = 0; j < l; ++j) { + res(i, j) = 0; + for (unsigned int k = 0; k < n; ++k) { + res(i, j) = res(i, j) + a(i, k) * b(k, j); + } + } + } + + return res; +} + +static auto naiveParallelMatMul(matrix<double> &a, matrix<double> &b) -> matrix<double> { + const auto m = a.size1(); // number of rows of a + const auto n = a.size2(); // number of columns of a + if (n != b.size1()) DIE_MSG("Invalid argument"); + const auto l = b.size2(); // number of rows of b + + matrix<double> res(m, l); + + CPS cps; + + for (unsigned int i = 0; i < m; ++i) { + for (unsigned int j = 0; j < l; ++j) { + spawn( + [i, j, n, &a, &b, &res] { + res(i, j) = 0; + for (unsigned int k = 0; k < n; ++k) { + res(i, j) = res(i, j) + a(i, k) * b(k, j); + } + }, + cps); + } + } + + cps.wait(); + + return res; +} + +static auto stupidParallelMatMul(matrix<double> &a, matrix<double> &b) -> matrix<double> { + const auto m = a.size1(); // number of rows of a + const auto n = a.size2(); // number of columns of a + if (n != b.size1()) DIE_MSG("Invalid argument"); + const auto l = b.size2(); // number of rows of b + + matrix<double> res(m, l); + + CPS outerCps; + + for (unsigned int i = 0; i < m; ++i) { + for (unsigned int j = 0; j < l; ++j) { + outerCps.incrementCounterByOne(); + + auto *innerCps = new CPS(); + auto *sem = new emper::Semaphore(1); + auto *scalprod = new double; + + for (unsigned int k = 0; k < n; ++k) { + spawn( + [i, j, k, sem, scalprod, &a, &b] { + double product = a(i, k) * b(k, j); + + sem->acquire(); + *scalprod += product; + sem->release(); + }, + *innerCps); + } + + async([i, j, innerCps, sem, scalprod, &res, &outerCps] { + innerCps->wait(); + + res(i, j) = *scalprod; + + outerCps.signal(); + + delete innerCps; + delete sem; + delete scalprod; + }); + } + } + + outerCps.wait(); + + return res; +} + +template <class G> +static auto createRandomMatrix(unsigned int m_dim, unsigned int n_dim, float zero_probability, + G &g) -> matrix<double> { + double min = 0; + double max = 1'000'000'000'000'000; + std::uniform_real_distribution<> value_distribution(min, max); + + std::bernoulli_distribution zero_distribution(zero_probability); + + matrix<double> res(m_dim, n_dim); + for (unsigned int m = 0; m < m_dim; ++m) { + for (unsigned int n = 0; n < n_dim; ++n) { + double value; + + if (zero_distribution(g)) + value = 0; + else + value = value_distribution(g); + + res(m, n) = value; + } + } + + return res; +} + +using ParMatMul = struct { + std::string name; + matrix<double> (*mul)(matrix<double> &a, matrix<double> &b); +}; + +namespace chrn = std::chrono; +using Clock = std::chrono::high_resolution_clock; + +static auto matmul(const po::variables_map &vm) -> int { + const auto nthreads = vm["nthreads"].as<unsigned int>(); + const auto seed = vm["seed"].as<std::uint_fast64_t>(); + const auto m = vm["m"].as<unsigned int>(); + const auto n = vm["n"].as<unsigned int>(); + const auto l = vm["l"].as<unsigned int>(); + const auto par_mat_muls = vm["parMatMuls"].as<unsigned int>(); + const auto iters = vm["iters"].as<unsigned int>(); + const auto zero_probability = vm["zeroProb"].as<float>(); + const auto verify = vm["verify"].as<bool>(); + + Runtime runtime(nthreads); + std::mt19937_64 rng(seed); + + std::vector<matrix<double>> a_mtxs, b_mtxs, seq_results; + + for (unsigned int i = 0; i < par_mat_muls; ++i) { + auto a = createRandomMatrix(m, n, zero_probability, rng); + a_mtxs.push_back(a); + + auto b = createRandomMatrix(n, l, zero_probability, rng); + b_mtxs.push_back(b); + + auto seq_res = sequentialMatMul(a_mtxs[i], b_mtxs[i]); + seq_results.push_back(seq_res); + } + + auto par_mat_mul_impls = std::to_array<ParMatMul>({ + {"naive", naiveParallelMatMul}, + {"stupid", stupidParallelMatMul}, + }); + + int exitStatus = EXIT_SUCCESS; + Fiber *alphaFiber = Fiber::from([&] { + for (auto impl : par_mat_mul_impls) { + std::vector<chrn::milliseconds> durations(iters); + std::cout << "Start " << impl.name << " with " << iters << " iterations: " << std::flush; + + for (unsigned int iter = 0; iter < iters; ++iter) { + std::vector<matrix<double>> results(par_mat_muls); + CPS cps(par_mat_muls); + + chrn::time_point<Clock> start = Clock::now(); + for (unsigned int i = 0; i < par_mat_muls; ++i) { + async([i, &cps, &impl, &results, &a_mtxs, &b_mtxs] { + auto a = a_mtxs[i]; + auto b = b_mtxs[i]; + + auto res = impl.mul(a, b); + + results[i] = res; + cps.signal(); + }); + } + + cps.wait(); + chrn::time_point<Clock> end = Clock::now(); + + std::cout << "." << std::flush; + + auto duration = chrn::duration_cast<chrn::milliseconds>(end - start); + durations[iter] = duration; + + if (!verify) continue; + for (unsigned int i = 0; i < par_mat_muls; ++i) { + if (boost::numeric::ublas::detail::equals(seq_results[i], results[i], 1.e-6, 0.)) + continue; + + // clang-format off + std::cerr + << std::endl + << "ERROR: Matrix " << i << " produced by " << impl.name << " incorrect" << std::endl + << "Expected matrix: " << std::endl << seq_results[i] << std::endl + << "Provided matrix: " << std::endl << results[i] << std::endl + ; + // clang-format on + + exitStatus = EXIT_FAILURE; + runtime.initiateTermination(); + return; + } + } + + std::cout << std::endl << "Implementation '" << impl.name << "' took "; + for (unsigned int i = 0; i < iters; ++i) { + std::cout << durations[i]; + if (i + 1 != iters) std::cout << ", "; + } + std::cout << ";" << std::endl; + } + + runtime.initiateTermination(); + }); + runtime.scheduleFromAnywhere(*alphaFiber); + runtime.waitUntilFinished(); + + return exitStatus; +} + +auto main(int argc, char *argv[]) -> int { + po::options_description desc("Allowed options"); + // clang-format off + desc.add_options() + ("help", "Show help") + ("nthreads", po::value<unsigned int>()->default_value(std::thread::hardware_concurrency()), "Number of worker threads used by EMPER's runtime system") + ("seed", po::value<std::uint_fast64_t>()->default_value(20180922), "Initial seed of the random number generator") + ("m", po::value<unsigned int>()->default_value(10), "Number of rows of the first matrix and of the result matrix") + ("n", po::value<unsigned int>()->default_value(10), "Number of columns of the first matrix and of rows of the second matrix") + ("l", po::value<unsigned int>()->default_value(10), "Number of columns of the second matrix and of the result matrix") + ("parMatMuls", po::value<unsigned int>()->default_value(5), "Number of matrix multiplications to perform in parallel") + ("iters", po::value<unsigned int>()->default_value(3), "Number of benchmark iterations") + ("zeroProb", po::value<float>()->default_value(0.01), "Probaility of generating a zero when initializing the matrices") + ("verify", po::value<bool>()->default_value(true), "Verify the result of the matrix multiplication") + ; + // clang-format on + + auto parse_result = po::command_line_parser(argc, argv).options(desc).run(); + + po::variables_map vm; + po::store(parse_result, vm); + po::notify(vm); + + if (vm.count("help")) { + std::cout << desc << std::endl; + return EXIT_SUCCESS; + } + + try { + return matmul(vm); + } catch (std::exception &e) { + std::cerr << e.what() << std::endl; + return EXIT_FAILURE; + } +} diff --git a/apps/meson.build b/apps/meson.build index 1ab32678..54be3b49 100644 --- a/apps/meson.build +++ b/apps/meson.build @@ -60,6 +60,12 @@ if cpp_can_link_with_boost_program_options 'FibChildStealing.cpp', dependencies: [emper_dep, boost_program_options_dep], ) + + mat_mul_exe = executable( + 'mat-mul', + 'MatMul.cpp', + dependencies: [emper_dep, boost_dep, boost_program_options_dep], + ) endif subdir('fsearch') diff --git a/iwyu-mappings.imp b/iwyu-mappings.imp index b681f6ac..9eb3d503 100644 --- a/iwyu-mappings.imp +++ b/iwyu-mappings.imp @@ -1,5 +1,6 @@ [ { include: ["<bits/chrono.h>", "private", "<chrono>", "public"]}, + { include: ["<bits/chrono_io.h>", "private", "<chrono>", "public"]}, { include: ["<bits/getopt_core.h>", "private", "<unistd.h>", "public"] }, { include: ["@<gtest/.*>", "private", "<gtest/gtest.h>", "public"] }, { include: ["<urcu/map/urcu-memb.h>", "private", "<urcu.h>", "public"] }, @@ -13,6 +14,7 @@ { include: ["<bits/chrono.h>", "private", "<chrono>", "public" ] }, { include: ["<boost/detail/basic_pointerbuf.hpp>", "private", "<boost/program_options.hpp>", "public"], }, { include: ["<boost/lexical_cast/bad_lexical_cast.hpp>", "private", "<boost/program_options.hpp>", "public"], }, + { include: ["<boost/numeric/ublas/detail/matrix_assign.hpp>", "private", "<boost/numeric/ublas/matrix.hpp>", "public"], }, { include: ["<boost/program_options/detail/parsers.hpp>", "private", "<boost/program_options.hpp>", "public"], }, { include: ["<boost/program_options/detail/value_semantic.hpp>", "private", "<boost/program_options.hpp>", "public"], }, { include: ["<boost/program_options/errors.hpp>", "private", "<boost/program_options.hpp>", "public"], }, -- GitLab