Newer
Older
// SPDX-License-Identifier: LGPL-3.0-or-later
// Copyright © 2020 Florian Schmaus
#include <unistd.h> // for getopt, optarg
#include <algorithm> // for generate
#include <chrono> // for microseconds, high_resol...
#include <cstdint> // for uint8_t, UINT8_MAX
#include <cstdlib> // for abort, exit, EXIT_SUCCESS
#include <iostream> // for operator<<, basic_ostream
#include <random> // for mt19937, uniform_int_dis...
#include <string> // for string, operator<<, oper...
#include "CountingPrivateSemaphore.hpp" // for CPS
#include "Debug.hpp" // for DBG
#include "Fiber.hpp" // for Fiber, Fiber::NOT_AFFINE
#include "PrivateSemaphore.hpp" // for PS
#include "Runtime.hpp" // for Runtime
#include "emper-common.h" // for workeraffinity_t, UNUSED...
#include "lib/DebugUtil.hpp" // for enableStacktraceOnAborts
#include "strategies/laws/LawsStrategy.hpp" // for LawsStrategy, LawsStrate...
#define L1_CACHE_LINE_SIZE 64 // 64 Bytes
#define L1_DCACHE_SIZE (32 * 1024) // 32 KiB
#define L2_DCACHE_SIZE (256 * 1024) // 256 KiB
#define L3_DCACHE_SIZE (4096 * 1024) // 4 MiB
//#define FIBER_METADATA
static std::uniform_int_distribution<> UINT8_UNIFORM_DISTRIBUTION(0, UINT8_MAX);
struct FiberMetadata {
workerid_t workerId;
workeraffinity_t currentAffinity;
std::chrono::time_point<std::chrono::high_resolution_clock> start;
std::chrono::time_point<std::chrono::high_resolution_clock> end;
};
struct State {
const unsigned int fiberCount;
const unsigned int bytesPerFiber;
const unsigned int rounds;
Runtime& runtime;
std::mt19937 randomGenerator;
// TODO: Should the affinities in the affinity array be cache line
// aligned to avoid false sharing?
workeraffinity_t* affinity;
uint8_t* data;
FiberMetadata* fiberMetadata;
// std::map<unsigned int, std::vector<FiberMetadata>> fiberMetadata;
// NOLINTNEXTLINE(cert-msc32-c,cert-msc51-cpp)
State(Runtime& runtime, unsigned int fiberCount, unsigned int bytesPerFiber, unsigned int rounds,
unsigned int seed)
: fiberCount(fiberCount), bytesPerFiber(bytesPerFiber), rounds(rounds), runtime(runtime) {
affinity = new workeraffinity_t[fiberCount];
for (unsigned int i = 0; i < fiberCount; ++i) {
affinity[i] = Fiber::NOT_AFFINE;
}
#ifdef FIBER_METADATA
fiberMetadata = new FiberMetadata[fiberCount * rounds];
#endif
std::generate(data, data + fiberCount, [this] { return getNextRandom(); });
}
~State() {
delete[] affinity;
delete[] data;
#ifdef FIBER_METADATA
delete[] fiberMetadata;
auto getNextRandom() -> uint8_t { return UINT8_UNIFORM_DISTRIBUTION(randomGenerator); }
[[nodiscard]] auto getFiberMetadata(unsigned int fiberNum, unsigned int roundNum) const
-> FiberMetadata* {
return fiberMetadata + (fiberNum * fiberCount) + roundNum;
}
};
struct FiberArgs {
uint8_t* fiberData;
uint8_t roundData;
PS* ps;
// TODO: Check if this member is still needed
State* state;
#ifdef FIBER_METADATA
FiberMetadata* fiberMetadata;
#endif
static void performRound(State& state,
#ifndef FIBER_METADATA
UNUSED_ARG
unsigned int round) {
uint8_t roundData = state.getNextRandom();
CPS cps(state.fiberCount);
auto* fiberArgs = new FiberArgs[state.fiberCount];
DBG("Starting round " << round);
for (unsigned int i = 0; i < state.fiberCount; ++i) {
fiberArgs[i].fiberData = state.data + (i * state.bytesPerFiber);
fiberArgs[i].roundData = roundData;
fiberArgs[i].ps = &cps;
fiberArgs[i].state = &state;
#ifdef FIBER_METADATA
fiberArgs[i].fiberMetadata = state.getFiberMetadata(i, round);
#endif
Fiber* fiber = Fiber::from(
[](void* fiberArgsPtr) {
#ifdef FIBER_METADATA
FiberMetadata* fiberMetadata = fiberArgs->fiberMetadata;
fiberMetadata->start = std::chrono::high_resolution_clock::now();
fiberMetadata->workerId = Runtime::getWorkerId();
fiberMetadata->currentAffinity = Dispatcher::getCurrentFiber().getAffinity();
#endif
uint8_t* fiberData = fiberArgs->fiberData;
unsigned int bytesPerFiber = fiberArgs->state->bytesPerFiber;
for (unsigned int j = (bytesPerFiber - 1); j > (bytesPerFiber - 10); --j) {
for (unsigned int i = 0; i < bytesPerFiber; i++) {
unsigned int next = (i + L1_DCACHE_SIZE + fiberData[j]) % bytesPerFiber;
fiberData[i] -= fiberData[next];
fiberData[i] += fiberArgs->roundData;
/*
if (fiberData[i] < 128) {
fiberData[i] += fiberArgs->roundData / 2;
}
if (fiberData[i] > 192) {
fiberData[i] -= (fiberArgs->roundData * 4);
}
if (i == (bytesPerFiber * 0.75)) {
if (fiberData[i] < 128) {
break;
}
}
*/
#ifdef FIBER_METADATA
fiberMetadata->end = std::chrono::high_resolution_clock::now();
#endif
fiberArgs->ps->signal();
},
(void*)(fiberArgs + i), state.affinity + i);
state.runtime.schedule(*fiber);
}
cps.wait();
DBG("Finished round " << round);
delete[] fiberArgs;
IGNORE_UNUSED_FUNCTION
static void printState(State& state) {
std::ostream& ostream = std::cout;
for (unsigned int fiberNum = 0; fiberNum < state.fiberCount; ++fiberNum) {
for (unsigned int roundNum = 0; roundNum < state.rounds; ++roundNum) {
FiberMetadata* fiberMetadata = state.getFiberMetadata(fiberNum, roundNum);
ostream << fiberNum
<< ", " << roundNum
<< ", " << fiberMetadata->workerId
<< ", " << fiberMetadata->currentAffinity
<< ", " << fiberMetadata->start.time_since_epoch().count()
<< ", " << fiberMetadata->end.time_since_epoch().count()
<< std::endl;
}
}
}
POP_DIAGNOSTIC
static void run(Runtime& runtime, unsigned int fiberCount, unsigned int bytesPerFiber,
unsigned int rounds, unsigned int seed) {
State state(runtime, fiberCount, bytesPerFiber, rounds, seed);
auto start = std::chrono::high_resolution_clock::now();
for (unsigned int i = 0; i < state.rounds; ++i) {
performRound(state, i);
}
auto end = std::chrono::high_resolution_clock::now();
auto diff = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Inner " << diff.count() << " us" << std::endl;
// printState(state);
});
enum RuntimeVariant {
ws,
wslh,
};
auto main(UNUSED_ARG int argc, UNUSED_ARG char* argv[]) -> int {
RuntimeVariant runtimeVariant = ws;
int opt;
while ((opt = getopt(argc, argv, "m:")) != -1) {
std::string optargString;
switch (opt) {
case 'm':
optargString = std::string(optarg);
if (optargString == "ws") {
runtimeVariant = ws;
} else if (optargString == "wslh") {
runtimeVariant = wslh;
} else {
std::cerr << "Invalid -m argument " << optargString << std::endl;
abort();
}
break;
default:
abort();
}
}
unsigned int bytesPerFiber = L1_DCACHE_SIZE / 2;
unsigned int fiberCount = coreCount * 4;
std::chrono::time_point<std::chrono::high_resolution_clock> start, end;
Runtime* runtime;
switch (runtimeVariant) {
case ws:
runtime = new Runtime();
break;
case wslh:
runtime = new Runtime(LawsStrategy::INSTANCE);
break;
start = std::chrono::high_resolution_clock::now();
run(*runtime, fiberCount, bytesPerFiber, rounds, seed);
end = std::chrono::high_resolution_clock::now();
auto diff = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::string variantName;
switch (runtimeVariant) {
case ws:
variantName = "W/o Locality: ";
break;
case wslh:
variantName = "W Locality: ";
break;
std::cout << variantName << diff.count() << " us" << std::endl;
exit(EXIT_SUCCESS);