kernel/arch: enhance the "ready thread" cache The way the ready thread cache was implemented caused it to not always be "hot", i.e. there could be some misses, which happened when the cached thread was taken out of the ready queue. When that happened, it was not replaced immediately, since doing so could mean that the replacement might not run because the flow could be interrupted and another thread could take its place. This was the more conservative approach that insured that moving a thread to the cache would never be wasted. However, this caused two problems: 1. The cache could not be refilled until another thread context-switched in, since there was no thread in the cache to compare priorities against. 2. Interrupt exit code would always have to call into C to find what thread to run when the current thread was not coop and did not have the scheduler locked. Furthermore, it was possible for this code path to encounter a cold cache and then it had to find out what thread to run the long way. To fix this, filling the cache is now more aggressive, i.e. the next thread to put in the cache is found even in the case the current cached thread is context-switched out. This ensures the interrupt exit code is much faster on the slow path. In addition, since finding the next thread to run is now always "get it from the cache", which is a simple fetch from memory (_kernel.ready_q.cache), there is no need to call the more complex C code. On the ARM FRDM K64F board, this improvement is seen: Before: 1- Measure time to switch from ISR back to interrupted task switching time is 215 tcs = 1791 nsec 2- Measure time from ISR to executing a different task (rescheduled) switch time is 315 tcs = 2625 nsec After: 1- Measure time to switch from ISR back to interrupted task switching time is 130 tcs = 1083 nsec 2- Measure time from ISR to executing a different task (rescheduled) switch time is 225 tcs = 1875 nsec These are the most dramatic improvements, but most of the numbers generated by the latency_measure test are improved. Fixes ZEP-1401. Change-Id: I2eaac147048b1ec71a93bd0a285e743a39533973 Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>

commit: 88b36914152629330c61a4a60763072c88c0b44f [log] [tgz]
author: Benjamin Walsh <benjamin.walsh@windriver.com> Fri Dec 02 10:37:27 2016 -0500
committer: Benjamin Walsh <walsh.benj@gmail.com> Thu Dec 15 15:50:02 2016 -0500
tree: db0f317d98d1006d26d66617e14ff9bd59b86374
parent: e6ebe3a8b4b6204aa28850071ed003c1aac2d34a [diff]
diff --git a/kernel/unified/include/kernel_offsets.h b/kernel/unified/include/kernel_offsets.h
index e18228c..30c41b4 100644
--- a/kernel/unified/include/kernel_offsets.h
+++ b/kernel/unified/include/kernel_offsets.h

@@ -37,6 +37,9 @@
 GEN_OFFSET_SYM(_kernel_t, idle);
 #endif
 
+GEN_OFFSET_SYM(_kernel_t, ready_q);
+GEN_OFFSET_SYM(_ready_q_t, cache);
+
 #ifdef CONFIG_FP_SHARING
 GEN_OFFSET_SYM(_kernel_t, current_fp);
 #endif

diff --git a/kernel/unified/include/kernel_structs.h b/kernel/unified/include/kernel_structs.h
index 8ec5d99..c82fc61 100644
--- a/kernel/unified/include/kernel_structs.h
+++ b/kernel/unified/include/kernel_structs.h

@@ -146,6 +146,8 @@
 	sys_dlist_t q[K_NUM_PRIORITIES];
 };
 
+typedef struct _ready_q _ready_q_t;
+
 struct _kernel {
 
 	/* nested interrupt count */

diff --git a/kernel/unified/include/ksched.h b/kernel/unified/include/ksched.h
index 08dab07..6e9fe90 100644
--- a/kernel/unified/include/ksched.h
+++ b/kernel/unified/include/ksched.h

@@ -30,11 +30,17 @@
 			 _wait_q_t *wait_q, int32_t timeout);
 extern void _pend_current_thread(_wait_q_t *wait_q, int32_t timeout);
 extern void _move_thread_to_end_of_prio_q(struct k_thread *thread);
-extern struct k_thread *_get_next_ready_thread(void);
 extern int __must_switch_threads(void);
 extern int32_t _ms_to_ticks(int32_t ms);
 extern void idle(void *, void *, void *);
 
+/* find which one is the next thread to run */
+/* must be called with interrupts locked */
+static ALWAYS_INLINE struct k_thread *_get_next_ready_thread(void)
+{
+	return _ready_q.cache;
+}
+
 static inline int _is_idle_thread(void *entry_point)
 {
 	return entry_point == idle;

diff --git a/kernel/unified/include/offsets_short.h b/kernel/unified/include/offsets_short.h
index dbfb1f5a..3291864 100644
--- a/kernel/unified/include/offsets_short.h
+++ b/kernel/unified/include/offsets_short.h

@@ -39,6 +39,9 @@
 #define _kernel_offset_to_current_fp \
 	(___kernel_t_current_fp_OFFSET)
 
+#define _kernel_offset_to_ready_q_cache \
+	(___kernel_t_ready_q_OFFSET + ___ready_q_t_cache_OFFSET)
+
 /* end - kernel */
 
 /* threads */

diff --git a/kernel/unified/init.c b/kernel/unified/init.c
index 70bf60f..8f8ae5a 100644
--- a/kernel/unified/init.c
+++ b/kernel/unified/init.c

@@ -265,6 +265,17 @@
 		sys_dlist_init(&_ready_q.q[ii]);
 	}
 
+	/*
+	 * prime the cache with the main thread since:
+	 *
+	 * - the cache can never be NULL
+	 * - the main thread will be the one to run first
+	 * - no other thread is initialized yet and thus their priority fields
+	 *   contain garbage, which would prevent the cache loading algorithm
+	 *   to work as intended
+	 */
+	_ready_q.cache = _main_thread;
+
 	_new_thread(_main_stack, MAIN_STACK_SIZE,
 		    _main, NULL, NULL, NULL,
 		    CONFIG_MAIN_THREAD_PRIORITY, K_ESSENTIAL);

diff --git a/kernel/unified/sched.c b/kernel/unified/sched.c
index 9efc18a..c78be4e 100644
--- a/kernel/unified/sched.c
+++ b/kernel/unified/sched.c

@@ -42,6 +42,26 @@
 }
 
 /*
+ * Find the next thread to run when there is no thread in the cache and update
+ * the cache.
+ */
+static struct k_thread *_get_ready_q_head(void)
+{
+	int prio = _get_highest_ready_prio();
+	int q_index = _get_ready_q_q_index(prio);
+	sys_dlist_t *list = &_ready_q.q[q_index];
+
+	__ASSERT(!sys_dlist_is_empty(list),
+		 "no thread to run (prio: %d, queue index: %u)!\n",
+		 prio, q_index);
+
+	struct k_thread *thread =
+		(struct k_thread *)sys_dlist_peek_head_not_empty(list);
+
+	return thread;
+}
+
+/*
  * Add thread to the ready queue, in the slot for its priority; the thread
  * must not be on a wait queue.
  *
@@ -61,9 +81,7 @@
 
 	struct k_thread **cache = &_ready_q.cache;
 
-	*cache = *cache && _is_prio_higher(thread->base.prio,
-					   (*cache)->base.prio) ?
-		 thread : *cache;
+	*cache = _is_t1_higher_prio_than_t2(thread, *cache) ? thread : *cache;
 }
 
 /*
@@ -85,7 +103,7 @@
 
 	struct k_thread **cache = &_ready_q.cache;
 
-	*cache = *cache == thread ? NULL : *cache;
+	*cache = *cache == thread ? _get_ready_q_head() : *cache;
 }
 
 /* reschedule threads if the scheduler is not locked */
@@ -183,37 +201,6 @@
 }
 
 /*
- * Find the next thread to run when there is no thread in the cache and update
- * the cache.
- */
-static struct k_thread *__get_next_ready_thread(void)
-{
-	int prio = _get_highest_ready_prio();
-	int q_index = _get_ready_q_q_index(prio);
-	sys_dlist_t *list = &_ready_q.q[q_index];
-
-	__ASSERT(!sys_dlist_is_empty(list),
-		 "no thread to run (prio: %d, queue index: %u)!\n",
-		 prio, q_index);
-
-	struct k_thread *thread =
-		(struct k_thread *)sys_dlist_peek_head_not_empty(list);
-
-	_ready_q.cache = thread;
-
-	return thread;
-}
-
-/* find which one is the next thread to run */
-/* must be called with interrupts locked */
-struct k_thread *_get_next_ready_thread(void)
-{
-	struct k_thread *cache = _ready_q.cache;
-
-	return cache ? cache : __get_next_ready_thread();
-}
-
-/*
  * Check if there is a thread of higher prio than the current one. Should only
  * be called if we already know that the current thread is preemptible.
  */
@@ -228,11 +215,6 @@
 	return _is_prio_higher(_get_highest_ready_prio(), _current->base.prio);
 }
 
-int _is_next_thread_current(void)
-{
-	return _get_next_ready_thread() == _current;
-}
-
 int  k_thread_priority_get(k_tid_t thread)
 {
 	return thread->base.prio;
@@ -275,7 +257,7 @@
 
 	struct k_thread **cache = &_ready_q.cache;
 
-	*cache = *cache == thread ? NULL : *cache;
+	*cache = *cache == thread ? _get_ready_q_head() : *cache;
 }
 
 void k_yield(void)
commit	88b36914152629330c61a4a60763072c88c0b44f	[log] [tgz]
author	Benjamin Walsh <benjamin.walsh@windriver.com>	Fri Dec 02 10:37:27 2016 -0500
committer	Benjamin Walsh <walsh.benj@gmail.com>	Thu Dec 15 15:50:02 2016 -0500
tree	db0f317d98d1006d26d66617e14ff9bd59b86374
parent	e6ebe3a8b4b6204aa28850071ed003c1aac2d34a [diff]