mbs: Save 5% total runtime and time in critical section

92495ae7 · Luis Gerhorst · 62e4e05b · 92495ae7 · 92495ae7 · 92495ae7
Commit 92495ae7 authored 4 years ago by Luis Gerhorst
--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@@ -813,7 +813,6 @@ config MBS
 config MBS_NUM_CPUS
 	int "Number of MBS CPUs/cores"
 	default 0
-	range 0 4
 	help
 	  Number of multiprocessing-capable cores available to the
 	  migration-based synchronization features.

--- a/samples/mbtex_count/prj.conf
+++ b/samples/mbtex_count/prj.conf
 CONFIG_STDOUT_CONSOLE=y
-# enable to use thread names
 CONFIG_THREAD_NAME=y
+CONFIG_TIMING_FUNCTIONS=y

 # Enable migration-based synchronization
-CONFIG_MBS=y
-CONFIG_MBS_NUM_CPUS=1
-
-CONFIG_TIMING_FUNCTIONS=y
+CONFIG_MBS=n
+CONFIG_MBS_NUM_CPUS=0
--- a/samples/mbtex_count/src/main.c
+++ b/samples/mbtex_count/src/main.c
@@ -15,12 +15,17 @@
 /* scheduling priority used by each thread */
 #define PRIORITY 7

-#define ITER (1000*10)
-
+#define ITER (1000)

+#ifdef CONFIG_MBS
+#define K_MXTEX_DEFINE K_MBTEX_DEFINE
+#define k_mxtex_lock k_mbtex_lock
+#define k_mxtex_unlock k_mbtex_unlock
+#else
 #define K_MXTEX_DEFINE K_MUTEX_DEFINE
 #define k_mxtex_lock k_mutex_lock
 #define k_mxtex_unlock k_mutex_unlock
+#endif

 K_MXTEX_DEFINE(mxtex);

@@ -28,97 +33,101 @@ K_MXTEX_DEFINE(mxtex);
 * and 64KiB) and 128KB-512KB L2 cache primarily used by the GPU (??, see BCM2835
 * Datasheet). */

-#define L1_CACHE_SIZE (1024*256)
+#define L1_CACHE_SIZE (1024*128)
 #define L1_CACHE_SET_ASSOC 4
 #define	L1_CACHELINE_SIZE (32)

 #define L1_NR_CACHELINES (L1_CACHE_SIZE / L1_CACHELINE_SIZE)

 #define GRAN sizeof(uint32_t)
-#define OTHER_SIZE ((1024*512) / GRAN)
 #define SIZE ((L1_CACHE_SIZE) / GRAN)
 #define STEP ((L1_CACHELINE_SIZE) / GRAN)

-static volatile uint64_t other_data[OTHER_SIZE] = {};
 static volatile uint64_t data[SIZE] = {};

 static int workload(int acc) {
 #pragma clang loop unroll(64)
 	for (int j = 0; j < SIZE; j += STEP) {
-		acc += data[j];
+		acc += data[j]++;
+	}
+	return acc;
+}
+
+#define OTHER_ITER 2
+#define OTHER_SIZE SIZE
+static volatile uint64_t other_data[OTHER_SIZE];
+
+static int other_workload(int acc) {
+	for (int i = 0; i < OTHER_ITER; i++) {
+		for (int j = 0; j < SIZE; j += STEP) {
+			acc += other_data[j]++;
+		}
 	}
+
+	acc += timing_counter_get();
 	return acc;
 }

-void test(const char *thread_name)
+void time_mxtex(void)
 {
 	timing_init();
 	timing_start();

 	for (int rep = 0; true; rep++) {
 		timing_t start_time = timing_counter_get();
-		timing_t poststart_time = timing_counter_get();
+		uint32_t lock_total = 0;
 		uint32_t buff_total = 0;
-		uint32_t buff2_total = 0;
+		uint32_t buff_max = 0;
+		uint32_t buff_min = UINT32_MAX;
+
 		unsigned int acc = 0;

 		for (int i = 0; i < ITER; i++) {
+			timing_t lock_start_time = timing_counter_get();
 			k_mxtex_lock(&mxtex, K_FOREVER);
-
-			/* Prepare the caches. */
-			acc = workload(acc);
-			for (int j = 0; j < OTHER_SIZE; j++) {
-				acc += other_data[j]++;
-			}
-			arch_dcache_flush(other_data, OTHER_SIZE);
-			arch_dcache_invd(other_data, OTHER_SIZE);
-			arch_dcache_flush(data, SIZE);
-			arch_dcache_invd(data, SIZE);
-			acc += timing_counter_get();
+			timing_t lock_end_time = timing_counter_get();

 			timing_t buffstart_time = timing_counter_get();
 			acc = workload(acc);
 			timing_t buffend_time = timing_counter_get();

-			timing_t buffstart2_time = timing_counter_get();
-			acc = workload(acc);
-			timing_t buffend2_time = timing_counter_get();
+			/* TODO: count on which cpu this was executed and how
+			 * often it is different from the previous one */

 			k_mxtex_unlock(&mxtex);

-			uint32_t buff = timing_cycles_get(&buffstart_time, &buffend_time);
-			uint32_t buff2 = timing_cycles_get(&buffstart2_time, &buffend2_time);
-			printk("%4u/100, %4d, %4u -> %4u\n", buff2 * 100 / buff, buff2 - buff, buff, buff2);
+			uint32_t lock_time = timing_cycles_get(&lock_start_time, &lock_end_time);
+			lock_total += lock_time;

+			uint32_t buff = timing_cycles_get(&buffstart_time, &buffend_time);
 			buff_total += buff;
-			buff2_total += buff2;
+			buff_max = buff > buff_max ? buff : buff_max;
+			buff_min = buff < buff_min ? buff : buff_min;
+
+			acc += other_workload(acc);
 		}
 		timing_t end_time = timing_counter_get();

-		uint32_t gran = timing_cycles_get(&start_time, &poststart_time);
-		uint32_t total = timing_cycles_get(&poststart_time, &end_time);
-
+		uint32_t total = timing_cycles_get(&start_time, &end_time);
 		printk("%s, rep %d, iter %d, "
-		       "total %d ms, gran %d ms, buff_total %d ms, buff2_total %d ms, acc %d\n",
+		       "total %6u, "
+		       "lock %6u "
+		       "buff %6u - %6u - %6u, acc %u\n",
 		       _current->name, rep, ITER,
-		       timing_cycles_to_ns(total) / 1000000,
-		       timing_cycles_to_ns(gran) / 1000000,
-		       timing_cycles_to_ns(buff_total) / 1000000,
-		       timing_cycles_to_ns(buff2_total) / 1000000,
+		       total / ITER,
+		       lock_total / ITER,
+		       buff_min, buff_total / ITER, buff_max,
 		       acc);
 	}
 }

-/* threadB is a dynamic thread that is spawned by threadA */
-
 void thread(void *dummy1, void *dummy2, void *dummy3)
 {
 	ARG_UNUSED(dummy1);
 	ARG_UNUSED(dummy2);
 	ARG_UNUSED(dummy3);

-	/* invoke routine to ping-pong hello messages with threadA */
-	test(__func__);
+	time_mxtex();
 }

 K_THREAD_DEFINE(thread_a, STACKSIZE, thread, NULL, NULL, NULL,
@@ -127,11 +136,11 @@ K_THREAD_DEFINE(thread_b, STACKSIZE, thread, NULL, NULL, NULL,
 		PRIORITY, 0, 0);
 K_THREAD_DEFINE(thread_c, STACKSIZE, thread, NULL, NULL, NULL,
 		PRIORITY, 0, 0);
-/* K_THREAD_DEFINE(thread_d, STACKSIZE, thread, NULL, NULL, NULL, */
-/* 		PRIORITY, 0, 0); */
-/* K_THREAD_DEFINE(thread_e, STACKSIZE, thread, NULL, NULL, NULL, */
-/* 		PRIORITY, 0, 0); */
-/* K_THREAD_DEFINE(thread_f, STACKSIZE, thread, NULL, NULL, NULL, */
-/* 		PRIORITY, 0, 0); */
-/* K_THREAD_DEFINE(thread_g, STACKSIZE, thread, NULL, NULL, NULL, */
-/* 		PRIORITY, 0, 0); */
+K_THREAD_DEFINE(thread_d, STACKSIZE, thread, NULL, NULL, NULL,
+		PRIORITY, 0, 0);
+K_THREAD_DEFINE(thread_e, STACKSIZE, thread, NULL, NULL, NULL,
+		PRIORITY, 0, 0);
+K_THREAD_DEFINE(thread_f, STACKSIZE, thread, NULL, NULL, NULL,
+		PRIORITY, 0, 0);
+K_THREAD_DEFINE(thread_g, STACKSIZE, thread, NULL, NULL, NULL,
+		PRIORITY, 0, 0);