| /* |
| * Copyright (c) 2019 Intel Corporation |
| * |
| * SPDX-License-Identifier: Apache-2.0 |
| */ |
| |
| #include <zephyr/kernel.h> |
| #include <zephyr/sys/printk.h> |
| #include <zephyr/wait_q.h> |
| #include <ksched.h> |
| |
| /* This is a scheduler microbenchmark, designed to measure latencies |
| * of specific low level scheduling primitives independent of overhead |
| * from application or API abstractions. It works very simply: a main |
| * thread creates a "partner" thread at a higher priority, the partner |
| * then sleeps using z_pend_curr_irqlock(). From this initial |
| * state: |
| * |
| * 1. The main thread calls z_unpend_first_thread() |
| * 2. The main thread calls z_ready_thread() |
| * 3. The main thread calls k_yield() |
| * (the kernel switches to the partner thread) |
| * 4. The partner thread then runs and calls z_pend_curr_irqlock() again |
| * (the kernel switches to the main thread) |
| * 5. The main thread returns from k_yield() |
| * |
| * It then iterates this many times, reporting timestamp latencies |
| * between each numbered step and for the whole cycle, and a running |
| * average for all cycles run. |
| */ |
| |
| #define N_RUNS 1000 |
| #define N_SETTLE 10 |
| |
| |
| static K_THREAD_STACK_DEFINE(partner_stack, 1024); |
| static struct k_thread partner_thread; |
| |
| _wait_q_t waitq; |
| |
| enum { |
| UNPENDING, |
| UNPENDED_READYING, |
| READIED_YIELDING, |
| PARTNER_AWAKE_PENDING, |
| YIELDED, |
| NUM_STAMP_STATES |
| }; |
| |
| uint32_t stamps[NUM_STAMP_STATES]; |
| |
| static inline int _stamp(int state) |
| { |
| uint32_t t; |
| |
| /* In theory the TSC has much lower overhead and higher |
| * precision. In practice it's VERY jittery in recent qemu |
| * versions and frankly too noisy to trust. |
| */ |
| #ifdef CONFIG_X86 |
| __asm__ volatile("rdtsc" : "=a"(t) : : "edx"); |
| #else |
| t = k_cycle_get_32(); |
| #endif |
| |
| stamps[state] = t; |
| return t; |
| } |
| |
| /* #define stamp(s) printk("%s @ %d\n", #s, _stamp(s)) */ |
| #define stamp(s) _stamp(s) |
| |
| static void partner_fn(void *arg1, void *arg2, void *arg3) |
| { |
| ARG_UNUSED(arg1); |
| ARG_UNUSED(arg2); |
| ARG_UNUSED(arg3); |
| |
| printk("Running %p\n", k_current_get()); |
| |
| while (true) { |
| unsigned int key = irq_lock(); |
| |
| z_pend_curr_irqlock(key, &waitq, K_FOREVER); |
| stamp(PARTNER_AWAKE_PENDING); |
| } |
| } |
| |
| int main(void) |
| { |
| z_waitq_init(&waitq); |
| |
| int main_prio = k_thread_priority_get(k_current_get()); |
| int partner_prio = main_prio - 1; |
| |
| k_tid_t th = k_thread_create(&partner_thread, partner_stack, |
| K_THREAD_STACK_SIZEOF(partner_stack), |
| partner_fn, NULL, NULL, NULL, |
| partner_prio, 0, K_NO_WAIT); |
| |
| /* Let it start running and pend */ |
| k_sleep(K_MSEC(100)); |
| |
| uint64_t tot = 0U; |
| uint32_t runs = 0U; |
| |
| for (int i = 0; i < N_RUNS + N_SETTLE; i++) { |
| stamp(UNPENDING); |
| z_unpend_first_thread(&waitq); |
| stamp(UNPENDED_READYING); |
| z_ready_thread(th); |
| stamp(READIED_YIELDING); |
| |
| /* z_ready_thread() does not reschedule, so this is |
| * guaranteed to be the point where we will yield to |
| * the new thread, which (being higher priority) will |
| * run immediately, and we'll wake up synchronously as |
| * soon as it pends. |
| */ |
| k_yield(); |
| stamp(YIELDED); |
| |
| uint32_t avg, whole = stamps[4] - stamps[0]; |
| |
| if (++runs > N_SETTLE) { |
| /* Only compute averages after the first ~10 |
| * runs to let performance settle, cache |
| * effects in the host pollute the early |
| * data |
| */ |
| tot += whole; |
| avg = tot / (runs - 10); |
| } else { |
| tot = 0U; |
| avg = 0U; |
| } |
| |
| /* For reference, an unmodified HEAD on qemu_x86 with |
| * !USERSPACE and SCHED_DUMB and using -icount |
| * shift=0,sleep=off,align=off, I get results of: |
| * |
| * unpend 132 ready 257 switch 278 pend 321 tot 988 (avg 900) |
| */ |
| printk("unpend %4d ready %4d switch %4d pend %4d tot %4d (avg %4d)\n", |
| stamps[1] - stamps[0], |
| stamps[2] - stamps[1], |
| stamps[3] - stamps[2], |
| stamps[4] - stamps[3], |
| whole, avg); |
| } |
| printk("fin\n"); |
| return 0; |
| } |