// SPDX-License-Identifier: LGPL-3.0-or-later
// Copyright © 2020 Florian Schmaus
#include <chrono>		 // for nanoseconds, time_point, dur...
#include <cstdint>	 // for uint8_t, uint64_t
#include <cstdlib>	 // for EXIT_SUCCESS
#include <iostream>	 // for operator<<, basic_ostream
#include <thread>		 // for thread

#include "BinaryPrivateSemaphore.hpp"		 // for BPS
#include "CountingPrivateSemaphore.hpp"	 // for CPS
#include "Fiber.hpp"										 // for Fiber
#include "PrivateSemaphore.hpp"					 // for PS
#include "Runtime.hpp"									 // for Runtime
#include "emper-common.h"								 // for UNUSED_ARG
#include "lib/DebugUtil.hpp"						 // for enableStacktraceOnAborts

#define CACHE_LINE_SIZE 64

static void spawnALotThreadsRecursiveTFun(unsigned int depth, unsigned int width,
																					unsigned int current_depth) {
	if (current_depth == depth) return;

	auto* threads = new std::thread[width];
	const unsigned int new_depth = current_depth + 1;
	for (unsigned int i = 0; i < width; ++i) {
		threads[i] = std::thread(spawnALotThreadsRecursiveTFun, depth, width, new_depth);
	}
	for (unsigned int i = 0; i < width; ++i) {
		threads[i].join();
	}

	delete[] threads;
}

static void spawnALotThreadsRecursive(unsigned int depth, unsigned int width) {
	std::thread thread(spawnALotThreadsRecursiveTFun, depth, width, 0);
	thread.join();
}

static void spawnALotThreadsNonRecursive(uint64_t count) {
	auto* flags = new uint8_t[count * CACHE_LINE_SIZE];
	auto* threads = new std::thread[count];
	for (uint64_t i = 0; i < count; ++i) {
		threads[i] = std::thread([&flags, i] { flags[i * CACHE_LINE_SIZE] = 1; });
	}

	for (uint64_t i = 0; i < count; ++i) {
		threads[i].join();
	}

	delete[] flags;
	delete[] threads;
}

struct SpawnALotFibersData {
	Runtime& runtime;
	const unsigned int depth;
	const unsigned int width;
	PS& ps;
	unsigned int current_depth;

	SpawnALotFibersData(SpawnALotFibersData* oldData, PS& ps)
			: runtime(oldData->runtime),
				depth(oldData->depth),
				width(oldData->width),
				ps(ps),
				current_depth(oldData->current_depth + 1) {}

	SpawnALotFibersData(Runtime& runtime, unsigned int depth, unsigned int width, PS& ps)
			: runtime(runtime), depth(depth), width(width), ps(ps), current_depth(0) {}
};

static void spawnALotFibersRecursiveFFun(void* dataPtr) {
	auto* data = (SpawnALotFibersData*)dataPtr;
	if (data->current_depth < data->depth) {
		CPS childSem(data->width);
		SpawnALotFibersData newData(data, childSem);

		for (unsigned int i = 0; i < data->width; ++i) {
			Fiber* fiber = Fiber::from(spawnALotFibersRecursiveFFun, (void*)&newData);
			data->runtime.schedule(*fiber);
		}

		childSem.wait();
	}

	data->ps.signal();
}

static void spawnALotFibersRecursive(Runtime& runtime, unsigned int depth, unsigned int width) {
	BPS bps;
	SpawnALotFibersData data(runtime, depth, width, bps);

	Fiber* fiber = Fiber::from(spawnALotFibersRecursiveFFun, (void*)&data);
	runtime.schedule(*fiber);

	bps.wait();
}

static void spawnALotFibersNonRecursive(Runtime& runtime, uint64_t count) {
	auto* flags = new uint8_t[count * CACHE_LINE_SIZE];
	CPS cps(count);

	for (uint64_t i = 0; i < count; ++i) {
		Fiber* fiber = Fiber::from([i, &cps, flags] {
			flags[i * CACHE_LINE_SIZE] = 1;
			cps.signal();
		});
		runtime.schedule(*fiber);
	}

	cps.wait();

	delete[] flags;
}

auto main(UNUSED_ARG int argc, UNUSED_ARG char* argv[]) -> int {
	enableStacktraceOnAborts();
	const uint64_t count = 1024;

	const unsigned int depth = 10;
	const unsigned int width = 2;

	std::chrono::time_point<std::chrono::high_resolution_clock> start, end;
	std::chrono::nanoseconds diff;

	start = std::chrono::high_resolution_clock::now();
	spawnALotThreadsRecursive(depth, width);
	end = std::chrono::high_resolution_clock::now();
	diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
	std::cout << "Spawn a lot of threads recursive (depth=" << depth << ", width=" << width
						<< ") took " << std::endl
						<< diff.count() << " ns" << std::endl;

	start = std::chrono::high_resolution_clock::now();
	spawnALotThreadsNonRecursive(count);
	end = std::chrono::high_resolution_clock::now();
	diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
	std::cout << "Spawn a lot of threads non-recursive (count=" << count << ") took " << std::endl
						<< diff.count() << " ns"
						<< " (" << diff.count() / count << " ns/thread)" << std::endl;

	Runtime runtime;

	runtime.executeAndWait([&] {
		start = std::chrono::high_resolution_clock::now();
		spawnALotFibersRecursive(runtime, depth, width);
		end = std::chrono::high_resolution_clock::now();
		diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
		std::cout << "Spawn a lot of fibers recursive (depth=" << depth << ", width=" << width
							<< ") took " << std::endl
							<< diff.count() << " ns" << std::endl;

		start = std::chrono::high_resolution_clock::now();
		spawnALotFibersNonRecursive(runtime, count);
		end = std::chrono::high_resolution_clock::now();
		diff = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
		std::cout << "Spawn a lot of fibers non-recursive (count=" << count << ") took " << std::endl
							<< diff.count() << " ns"
							<< " (" << diff.count() / count << " ns/fiber)" << std::endl;
	});

	return EXIT_SUCCESS;
}