Skip to content
Snippets Groups Projects
Commit 92495ae7 authored by Luis Gerhorst's avatar Luis Gerhorst
Browse files

mbs: Save 5% total runtime and time in critical section

parent 62e4e05b
Branches
No related tags found
No related merge requests found
......@@ -813,7 +813,6 @@ config MBS
config MBS_NUM_CPUS
int "Number of MBS CPUs/cores"
default 0
range 0 4
help
Number of multiprocessing-capable cores available to the
migration-based synchronization features.
......
CONFIG_STDOUT_CONSOLE=y
# enable to use thread names
CONFIG_THREAD_NAME=y
CONFIG_TIMING_FUNCTIONS=y
# Enable migration-based synchronization
CONFIG_MBS=y
CONFIG_MBS_NUM_CPUS=1
CONFIG_TIMING_FUNCTIONS=y
CONFIG_MBS=n
CONFIG_MBS_NUM_CPUS=0
......@@ -15,12 +15,17 @@
/* scheduling priority used by each thread */
#define PRIORITY 7
#define ITER (1000*10)
#define ITER (1000)
#ifdef CONFIG_MBS
#define K_MXTEX_DEFINE K_MBTEX_DEFINE
#define k_mxtex_lock k_mbtex_lock
#define k_mxtex_unlock k_mbtex_unlock
#else
#define K_MXTEX_DEFINE K_MUTEX_DEFINE
#define k_mxtex_lock k_mutex_lock
#define k_mxtex_unlock k_mutex_unlock
#endif
K_MXTEX_DEFINE(mxtex);
......@@ -28,97 +33,101 @@ K_MXTEX_DEFINE(mxtex);
* and 64KiB) and 128KB-512KB L2 cache primarily used by the GPU (??, see BCM2835
* Datasheet). */
#define L1_CACHE_SIZE (1024*256)
#define L1_CACHE_SIZE (1024*128)
#define L1_CACHE_SET_ASSOC 4
#define L1_CACHELINE_SIZE (32)
#define L1_NR_CACHELINES (L1_CACHE_SIZE / L1_CACHELINE_SIZE)
#define GRAN sizeof(uint32_t)
#define OTHER_SIZE ((1024*512) / GRAN)
#define SIZE ((L1_CACHE_SIZE) / GRAN)
#define STEP ((L1_CACHELINE_SIZE) / GRAN)
static volatile uint64_t other_data[OTHER_SIZE] = {};
static volatile uint64_t data[SIZE] = {};
static int workload(int acc) {
#pragma clang loop unroll(64)
for (int j = 0; j < SIZE; j += STEP) {
acc += data[j];
acc += data[j]++;
}
return acc;
}
#define OTHER_ITER 2
#define OTHER_SIZE SIZE
static volatile uint64_t other_data[OTHER_SIZE];
static int other_workload(int acc) {
for (int i = 0; i < OTHER_ITER; i++) {
for (int j = 0; j < SIZE; j += STEP) {
acc += other_data[j]++;
}
}
acc += timing_counter_get();
return acc;
}
void test(const char *thread_name)
void time_mxtex(void)
{
timing_init();
timing_start();
for (int rep = 0; true; rep++) {
timing_t start_time = timing_counter_get();
timing_t poststart_time = timing_counter_get();
uint32_t lock_total = 0;
uint32_t buff_total = 0;
uint32_t buff2_total = 0;
uint32_t buff_max = 0;
uint32_t buff_min = UINT32_MAX;
unsigned int acc = 0;
for (int i = 0; i < ITER; i++) {
timing_t lock_start_time = timing_counter_get();
k_mxtex_lock(&mxtex, K_FOREVER);
/* Prepare the caches. */
acc = workload(acc);
for (int j = 0; j < OTHER_SIZE; j++) {
acc += other_data[j]++;
}
arch_dcache_flush(other_data, OTHER_SIZE);
arch_dcache_invd(other_data, OTHER_SIZE);
arch_dcache_flush(data, SIZE);
arch_dcache_invd(data, SIZE);
acc += timing_counter_get();
timing_t lock_end_time = timing_counter_get();
timing_t buffstart_time = timing_counter_get();
acc = workload(acc);
timing_t buffend_time = timing_counter_get();
timing_t buffstart2_time = timing_counter_get();
acc = workload(acc);
timing_t buffend2_time = timing_counter_get();
/* TODO: count on which cpu this was executed and how
* often it is different from the previous one */
k_mxtex_unlock(&mxtex);
uint32_t buff = timing_cycles_get(&buffstart_time, &buffend_time);
uint32_t buff2 = timing_cycles_get(&buffstart2_time, &buffend2_time);
printk("%4u/100, %4d, %4u -> %4u\n", buff2 * 100 / buff, buff2 - buff, buff, buff2);
uint32_t lock_time = timing_cycles_get(&lock_start_time, &lock_end_time);
lock_total += lock_time;
uint32_t buff = timing_cycles_get(&buffstart_time, &buffend_time);
buff_total += buff;
buff2_total += buff2;
buff_max = buff > buff_max ? buff : buff_max;
buff_min = buff < buff_min ? buff : buff_min;
acc += other_workload(acc);
}
timing_t end_time = timing_counter_get();
uint32_t gran = timing_cycles_get(&start_time, &poststart_time);
uint32_t total = timing_cycles_get(&poststart_time, &end_time);
uint32_t total = timing_cycles_get(&start_time, &end_time);
printk("%s, rep %d, iter %d, "
"total %d ms, gran %d ms, buff_total %d ms, buff2_total %d ms, acc %d\n",
"total %6u, "
"lock %6u "
"buff %6u - %6u - %6u, acc %u\n",
_current->name, rep, ITER,
timing_cycles_to_ns(total) / 1000000,
timing_cycles_to_ns(gran) / 1000000,
timing_cycles_to_ns(buff_total) / 1000000,
timing_cycles_to_ns(buff2_total) / 1000000,
total / ITER,
lock_total / ITER,
buff_min, buff_total / ITER, buff_max,
acc);
}
}
/* threadB is a dynamic thread that is spawned by threadA */
void thread(void *dummy1, void *dummy2, void *dummy3)
{
ARG_UNUSED(dummy1);
ARG_UNUSED(dummy2);
ARG_UNUSED(dummy3);
/* invoke routine to ping-pong hello messages with threadA */
test(__func__);
time_mxtex();
}
K_THREAD_DEFINE(thread_a, STACKSIZE, thread, NULL, NULL, NULL,
......@@ -127,11 +136,11 @@ K_THREAD_DEFINE(thread_b, STACKSIZE, thread, NULL, NULL, NULL,
PRIORITY, 0, 0);
K_THREAD_DEFINE(thread_c, STACKSIZE, thread, NULL, NULL, NULL,
PRIORITY, 0, 0);
/* K_THREAD_DEFINE(thread_d, STACKSIZE, thread, NULL, NULL, NULL, */
/* PRIORITY, 0, 0); */
/* K_THREAD_DEFINE(thread_e, STACKSIZE, thread, NULL, NULL, NULL, */
/* PRIORITY, 0, 0); */
/* K_THREAD_DEFINE(thread_f, STACKSIZE, thread, NULL, NULL, NULL, */
/* PRIORITY, 0, 0); */
/* K_THREAD_DEFINE(thread_g, STACKSIZE, thread, NULL, NULL, NULL, */
/* PRIORITY, 0, 0); */
K_THREAD_DEFINE(thread_d, STACKSIZE, thread, NULL, NULL, NULL,
PRIORITY, 0, 0);
K_THREAD_DEFINE(thread_e, STACKSIZE, thread, NULL, NULL, NULL,
PRIORITY, 0, 0);
K_THREAD_DEFINE(thread_f, STACKSIZE, thread, NULL, NULL, NULL,
PRIORITY, 0, 0);
K_THREAD_DEFINE(thread_g, STACKSIZE, thread, NULL, NULL, NULL,
PRIORITY, 0, 0);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment