kernel/arch: enhance the "ready thread" cache
The way the ready thread cache was implemented caused it to not always
be "hot", i.e. there could be some misses, which happened when the
cached thread was taken out of the ready queue. When that happened, it
was not replaced immediately, since doing so could mean that the
replacement might not run because the flow could be interrupted and
another thread could take its place. This was the more conservative
approach that insured that moving a thread to the cache would never be
wasted.
However, this caused two problems:
1. The cache could not be refilled until another thread context-switched
in, since there was no thread in the cache to compare priorities
against.
2. Interrupt exit code would always have to call into C to find what
thread to run when the current thread was not coop and did not have the
scheduler locked. Furthermore, it was possible for this code path to
encounter a cold cache and then it had to find out what thread to run
the long way.
To fix this, filling the cache is now more aggressive, i.e. the next
thread to put in the cache is found even in the case the current cached
thread is context-switched out. This ensures the interrupt exit code is
much faster on the slow path. In addition, since finding the next thread
to run is now always "get it from the cache", which is a simple fetch
from memory (_kernel.ready_q.cache), there is no need to call the more
complex C code.
On the ARM FRDM K64F board, this improvement is seen:
Before:
1- Measure time to switch from ISR back to interrupted task
switching time is 215 tcs = 1791 nsec
2- Measure time from ISR to executing a different task (rescheduled)
switch time is 315 tcs = 2625 nsec
After:
1- Measure time to switch from ISR back to interrupted task
switching time is 130 tcs = 1083 nsec
2- Measure time from ISR to executing a different task (rescheduled)
switch time is 225 tcs = 1875 nsec
These are the most dramatic improvements, but most of the numbers
generated by the latency_measure test are improved.
Fixes ZEP-1401.
Change-Id: I2eaac147048b1ec71a93bd0a285e743a39533973
Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
diff --git a/kernel/unified/include/kernel_offsets.h b/kernel/unified/include/kernel_offsets.h
index e18228c..30c41b4 100644
--- a/kernel/unified/include/kernel_offsets.h
+++ b/kernel/unified/include/kernel_offsets.h
@@ -37,6 +37,9 @@
GEN_OFFSET_SYM(_kernel_t, idle);
#endif
+GEN_OFFSET_SYM(_kernel_t, ready_q);
+GEN_OFFSET_SYM(_ready_q_t, cache);
+
#ifdef CONFIG_FP_SHARING
GEN_OFFSET_SYM(_kernel_t, current_fp);
#endif
diff --git a/kernel/unified/include/kernel_structs.h b/kernel/unified/include/kernel_structs.h
index 8ec5d99..c82fc61 100644
--- a/kernel/unified/include/kernel_structs.h
+++ b/kernel/unified/include/kernel_structs.h
@@ -146,6 +146,8 @@
sys_dlist_t q[K_NUM_PRIORITIES];
};
+typedef struct _ready_q _ready_q_t;
+
struct _kernel {
/* nested interrupt count */
diff --git a/kernel/unified/include/ksched.h b/kernel/unified/include/ksched.h
index 08dab07..6e9fe90 100644
--- a/kernel/unified/include/ksched.h
+++ b/kernel/unified/include/ksched.h
@@ -30,11 +30,17 @@
_wait_q_t *wait_q, int32_t timeout);
extern void _pend_current_thread(_wait_q_t *wait_q, int32_t timeout);
extern void _move_thread_to_end_of_prio_q(struct k_thread *thread);
-extern struct k_thread *_get_next_ready_thread(void);
extern int __must_switch_threads(void);
extern int32_t _ms_to_ticks(int32_t ms);
extern void idle(void *, void *, void *);
+/* find which one is the next thread to run */
+/* must be called with interrupts locked */
+static ALWAYS_INLINE struct k_thread *_get_next_ready_thread(void)
+{
+ return _ready_q.cache;
+}
+
static inline int _is_idle_thread(void *entry_point)
{
return entry_point == idle;
diff --git a/kernel/unified/include/offsets_short.h b/kernel/unified/include/offsets_short.h
index dbfb1f5a..3291864 100644
--- a/kernel/unified/include/offsets_short.h
+++ b/kernel/unified/include/offsets_short.h
@@ -39,6 +39,9 @@
#define _kernel_offset_to_current_fp \
(___kernel_t_current_fp_OFFSET)
+#define _kernel_offset_to_ready_q_cache \
+ (___kernel_t_ready_q_OFFSET + ___ready_q_t_cache_OFFSET)
+
/* end - kernel */
/* threads */
diff --git a/kernel/unified/init.c b/kernel/unified/init.c
index 70bf60f..8f8ae5a 100644
--- a/kernel/unified/init.c
+++ b/kernel/unified/init.c
@@ -265,6 +265,17 @@
sys_dlist_init(&_ready_q.q[ii]);
}
+ /*
+ * prime the cache with the main thread since:
+ *
+ * - the cache can never be NULL
+ * - the main thread will be the one to run first
+ * - no other thread is initialized yet and thus their priority fields
+ * contain garbage, which would prevent the cache loading algorithm
+ * to work as intended
+ */
+ _ready_q.cache = _main_thread;
+
_new_thread(_main_stack, MAIN_STACK_SIZE,
_main, NULL, NULL, NULL,
CONFIG_MAIN_THREAD_PRIORITY, K_ESSENTIAL);
diff --git a/kernel/unified/sched.c b/kernel/unified/sched.c
index 9efc18a..c78be4e 100644
--- a/kernel/unified/sched.c
+++ b/kernel/unified/sched.c
@@ -42,6 +42,26 @@
}
/*
+ * Find the next thread to run when there is no thread in the cache and update
+ * the cache.
+ */
+static struct k_thread *_get_ready_q_head(void)
+{
+ int prio = _get_highest_ready_prio();
+ int q_index = _get_ready_q_q_index(prio);
+ sys_dlist_t *list = &_ready_q.q[q_index];
+
+ __ASSERT(!sys_dlist_is_empty(list),
+ "no thread to run (prio: %d, queue index: %u)!\n",
+ prio, q_index);
+
+ struct k_thread *thread =
+ (struct k_thread *)sys_dlist_peek_head_not_empty(list);
+
+ return thread;
+}
+
+/*
* Add thread to the ready queue, in the slot for its priority; the thread
* must not be on a wait queue.
*
@@ -61,9 +81,7 @@
struct k_thread **cache = &_ready_q.cache;
- *cache = *cache && _is_prio_higher(thread->base.prio,
- (*cache)->base.prio) ?
- thread : *cache;
+ *cache = _is_t1_higher_prio_than_t2(thread, *cache) ? thread : *cache;
}
/*
@@ -85,7 +103,7 @@
struct k_thread **cache = &_ready_q.cache;
- *cache = *cache == thread ? NULL : *cache;
+ *cache = *cache == thread ? _get_ready_q_head() : *cache;
}
/* reschedule threads if the scheduler is not locked */
@@ -183,37 +201,6 @@
}
/*
- * Find the next thread to run when there is no thread in the cache and update
- * the cache.
- */
-static struct k_thread *__get_next_ready_thread(void)
-{
- int prio = _get_highest_ready_prio();
- int q_index = _get_ready_q_q_index(prio);
- sys_dlist_t *list = &_ready_q.q[q_index];
-
- __ASSERT(!sys_dlist_is_empty(list),
- "no thread to run (prio: %d, queue index: %u)!\n",
- prio, q_index);
-
- struct k_thread *thread =
- (struct k_thread *)sys_dlist_peek_head_not_empty(list);
-
- _ready_q.cache = thread;
-
- return thread;
-}
-
-/* find which one is the next thread to run */
-/* must be called with interrupts locked */
-struct k_thread *_get_next_ready_thread(void)
-{
- struct k_thread *cache = _ready_q.cache;
-
- return cache ? cache : __get_next_ready_thread();
-}
-
-/*
* Check if there is a thread of higher prio than the current one. Should only
* be called if we already know that the current thread is preemptible.
*/
@@ -228,11 +215,6 @@
return _is_prio_higher(_get_highest_ready_prio(), _current->base.prio);
}
-int _is_next_thread_current(void)
-{
- return _get_next_ready_thread() == _current;
-}
-
int k_thread_priority_get(k_tid_t thread)
{
return thread->base.prio;
@@ -275,7 +257,7 @@
struct k_thread **cache = &_ready_q.cache;
- *cache = *cache == thread ? NULL : *cache;
+ *cache = *cache == thread ? _get_ready_q_head() : *cache;
}
void k_yield(void)