diff --git a/eval/BFP9000.cpp b/eval/BFP9000.cpp index b9588dbe107b5fc374823a97b6c5f7880889c098..d63ea7d15170cbe7adeea7a80e4cd3c61244effe 100644 --- a/eval/BFP9000.cpp +++ b/eval/BFP9000.cpp @@ -51,11 +51,11 @@ static std::uint64_t number_of_pulses; static std::uint64_t pulse_target_sleep_ns; struct PulseTrace { - chrn::time_point<Clock> sleep_finish; std::uint64_t work_ns; chrn::time_point<Clock> work_start; chrn::time_point<Clock> work_finish; std::uint64_t sleep_ns; + chrn::time_point<Clock> sleep_start; }; class Pulser { @@ -67,6 +67,7 @@ class Pulser { public: std::vector<PulseTrace> traces; + chrn::time_point<Clock> pulse_start; private: static void busyWait(std::uint64_t work_ns) { @@ -119,9 +120,9 @@ class Pulser { if (trace) { traces = std::vector<PulseTrace>(number_of_pulses); - traces[0].sleep_finish = Clock::now(); } + pulse_start = Clock::now(); for (std::uint64_t i = 0; i < number_of_pulses; ++i) { const auto work_ns = getNanos(work_dist, remaining_work_ns, pulse_target_work_ns, i); if (trace) { @@ -135,12 +136,10 @@ class Pulser { const auto sleep_ns = getNanos(sleep_dist, remaining_sleep_ns, pulse_target_sleep_ns, i); if (trace) { traces[i].sleep_ns = sleep_ns; - auto next_i = i + 1; - if (next_i < number_of_pulses) { - traces[next_i].sleep_finish = Clock::now() + chrn::nanoseconds(sleep_ns); - } + traces[i].sleep_start = Clock::now(); } emper::nanosleep(sleep_ns); + // TODO: record actual sleep finish? remaining_sleep_ns -= sleep_ns; } } @@ -204,6 +203,7 @@ static void bfp9000() { std::optional<std::ofstream> latencies_file_strm; std::optional<std::ofstream> work_file_strm; std::optional<std::ofstream> sleep_file_strm; + std::optional<std::ofstream> csv_file_strm; if (result_dir) { auto latencies_file = *result_dir / "latencies"; latencies_file_strm = std::ofstream(latencies_file); @@ -213,27 +213,48 @@ static void bfp9000() { auto sleep_file = *result_dir / "sleep"; sleep_file_strm = std::ofstream(sleep_file); + + auto csv_file = *result_dir / "csv"; + csv_file_strm = std::ofstream(csv_file); } for (auto& pulser : pulsers) { std::uint64_t pulser_work_ns = 0; std::uint64_t pulser_sleep_ns = 0; + + auto previous_sleep_finish = pulser.pulse_start; for (auto& trace : pulser.traces) { pulser_work_ns += trace.work_ns; pulser_sleep_ns += trace.sleep_ns; - auto pulse_latency = - chrn::duration_cast<chrn::nanoseconds>(trace.work_start - trace.sleep_finish); + auto sleep_finish = previous_sleep_finish; + previous_sleep_finish = trace.work_finish + chrn::nanoseconds(trace.sleep_ns); + + auto pulse_latency = chrn::duration_cast<chrn::nanoseconds>(trace.work_start - sleep_finish); assert(pulse_latency.count()); latencies.push_back(pulse_latency); if (!result_dir) continue; + auto work_wall_clock = + chrn::duration_cast<chrn::nanoseconds>(trace.work_finish - trace.work_start); + auto work_speedup = + static_cast<double>(trace.work_ns) / static_cast<double>(work_wall_clock.count()); + // NOLINTBEGIN(bugprone-unchecked-optional-access) *latencies_file_strm << pulse_latency.count() << std::endl; *work_file_strm << trace.work_ns << std::endl; *sleep_file_strm << trace.sleep_ns << std::endl; + // clang-format off + *csv_file_strm << sleep_finish + << ", " << trace.work_start + << ", " << pulse_latency + << ", " << trace.work_finish + << ", " << work_wall_clock + << ", " << work_speedup + ; + // clang-format on // NOLINTEND(bugprone-unchecked-optional-access) }