| /* |
| * Copyright (c) 2024 Intel Corporation |
| * |
| * SPDX-License-Identifier: Apache-2.0 |
| */ |
| |
| /* |
| * @file |
| * This file contains tests that will measure the length of time required |
| * to add and remove threads from a wait queue that holds a varying number of |
| * threads. Each thread added to (and removed from) the wait queue is a dummy |
| * thread. As these dummy threads are inherently non-executable, this helps |
| * prevent the addition/removal of threads to/from the ready queue from being |
| * included in these measurements. Furthermore, the use of dummy threads helps |
| * reduce the memory footprint as not only are thread stacks not required, |
| * but we also do not need the full k_thread structure for each of these |
| * dummy threads. |
| */ |
| |
| #include <zephyr/kernel.h> |
| #include <zephyr/timestamp.h> |
| #include <zephyr/timing/timing.h> |
| #include "utils.h" |
| #include <zephyr/tc_util.h> |
| #include <wait_q.h> |
| #include <ksched.h> |
| #include <stdio.h> |
| |
| uint32_t tm_off; |
| |
| static struct _thread_base dummy_thread[CONFIG_BENCHMARK_NUM_THREADS]; |
| static _wait_q_t wait_q; |
| |
| uint64_t add_cycles[CONFIG_BENCHMARK_NUM_THREADS]; |
| uint64_t remove_cycles[CONFIG_BENCHMARK_NUM_THREADS]; |
| |
| /** |
| * Initialize each dummy thread. |
| */ |
| static void dummy_threads_init(unsigned int num_threads) |
| { |
| unsigned int i; |
| unsigned int bucket_size; |
| |
| bucket_size = (num_threads / CONFIG_NUM_PREEMPT_PRIORITIES) + 1; |
| |
| for (i = 0; i < num_threads; i++) { |
| z_init_thread_base(&dummy_thread[i], i / bucket_size, |
| _THREAD_DUMMY, 0); |
| } |
| } |
| |
| static void cycles_reset(unsigned int num_threads) |
| { |
| unsigned int i; |
| |
| for (i = 0; i < num_threads; i++) { |
| add_cycles[i] = 0ULL; |
| remove_cycles[i] = 0ULL; |
| } |
| } |
| |
| /** |
| * Each successive dummy thread added to the wait queue is either of the |
| * same or lower priority. Each dummy thread removed from the wait queue |
| * is of the same or lower priority than the one previous. |
| */ |
| static void test_decreasing_priority(_wait_q_t *q, unsigned int num_threads) |
| { |
| unsigned int i; |
| timing_t start; |
| timing_t finish; |
| |
| /* Add to tail of wait queue */ |
| |
| for (i = 0; i < num_threads; i++) { |
| start = timing_counter_get(); |
| z_pend_thread((struct k_thread *)&dummy_thread[i], |
| q, K_FOREVER); |
| finish = timing_counter_get(); |
| |
| add_cycles[i] += timing_cycles_get(&start, &finish); |
| } |
| |
| /* Remove from head of wait queue */ |
| |
| for (i = 0; i < num_threads; i++) { |
| start = timing_counter_get(); |
| z_unpend_thread((struct k_thread *)&dummy_thread[i]); |
| finish = timing_counter_get(); |
| |
| remove_cycles[i] += timing_cycles_get(&start, &finish); |
| } |
| } |
| |
| static void test_increasing_priority(_wait_q_t *q, unsigned int num_threads) |
| { |
| unsigned int i; |
| timing_t start; |
| timing_t finish; |
| struct k_thread *thread; |
| |
| /* Add to head of wait queue */ |
| |
| for (i = 0; i < num_threads; i++) { |
| start = timing_counter_get(); |
| thread = (struct k_thread *) |
| &dummy_thread[num_threads - i - 1]; |
| z_pend_thread(thread, q, K_FOREVER); |
| finish = timing_counter_get(); |
| |
| add_cycles[i] += timing_cycles_get(&start, &finish); |
| } |
| |
| /* Remove from tail of wait queue */ |
| |
| for (i = 0; i < num_threads; i++) { |
| start = timing_counter_get(); |
| thread = (struct k_thread *) |
| &dummy_thread[num_threads - i - 1]; |
| z_unpend_thread(thread); |
| finish = timing_counter_get(); |
| |
| remove_cycles[i] += timing_cycles_get(&start, &finish); |
| } |
| } |
| |
| |
| static uint64_t sqrt_u64(uint64_t square) |
| { |
| if (square > 1) { |
| uint64_t lo = sqrt_u64(square >> 2) << 1; |
| uint64_t hi = lo + 1; |
| |
| return ((hi * hi) > square) ? lo : hi; |
| } |
| |
| return square; |
| } |
| |
| |
| static void compute_and_report_stats(unsigned int num_threads, |
| unsigned int num_iterations, |
| uint64_t *cycles, |
| const char *str) |
| { |
| uint64_t minimum = cycles[0]; |
| uint64_t maximum = cycles[0]; |
| uint64_t total = cycles[0]; |
| uint64_t average; |
| uint64_t std_dev = 0; |
| uint64_t tmp; |
| uint64_t diff; |
| unsigned int i; |
| |
| for (i = 1; i < num_threads; i++) { |
| if (cycles[i] > maximum) { |
| maximum = cycles[i]; |
| } |
| |
| if (cycles[i] < minimum) { |
| minimum = cycles[i]; |
| } |
| |
| total += cycles[i]; |
| } |
| |
| minimum /= (uint64_t)num_iterations; |
| maximum /= (uint64_t)num_iterations; |
| average = total / (num_threads * num_iterations); |
| |
| /* Calculate standard deviation */ |
| |
| for (i = 0; i < num_threads; i++) { |
| tmp = cycles[i] / num_iterations; |
| diff = (average > tmp) ? (average - tmp) : (tmp - average); |
| |
| std_dev += (diff * diff); |
| } |
| std_dev /= num_threads; |
| std_dev = sqrt_u64(std_dev); |
| |
| printk("%s\n", str); |
| |
| printk(" Minimum : %7llu cycles (%7u nsec)\n", |
| minimum, (uint32_t)timing_cycles_to_ns(minimum)); |
| printk(" Maximum : %7llu cycles (%7u nsec)\n", |
| maximum, (uint32_t)timing_cycles_to_ns(maximum)); |
| printk(" Average : %7llu cycles (%7u nsec)\n", |
| average, (uint32_t)timing_cycles_to_ns(average)); |
| printk(" Std Deviation: %7llu cycles (%7u nsec)\n", |
| std_dev, (uint32_t)timing_cycles_to_ns(std_dev)); |
| |
| } |
| |
| int main(void) |
| { |
| unsigned int i; |
| unsigned int freq; |
| #ifdef CONFIG_BENCHMARK_VERBOSE |
| char description[120]; |
| char tag[50]; |
| struct k_thread *thread; |
| #endif |
| |
| timing_init(); |
| |
| bench_test_init(); |
| |
| freq = timing_freq_get_mhz(); |
| |
| printk("Time Measurements for %s wait queues\n", |
| IS_ENABLED(CONFIG_WAITQ_DUMB) ? "dumb" : "scalable"); |
| printk("Timing results: Clock frequency: %u MHz\n", freq); |
| |
| z_waitq_init(&wait_q); |
| |
| dummy_threads_init(CONFIG_BENCHMARK_NUM_THREADS); |
| |
| timing_start(); |
| |
| cycles_reset(CONFIG_BENCHMARK_NUM_THREADS); |
| |
| for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) { |
| test_decreasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS); |
| } |
| |
| compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, |
| CONFIG_BENCHMARK_NUM_ITERATIONS, |
| add_cycles, |
| "Add threads of decreasing priority"); |
| |
| #ifdef CONFIG_BENCHMARK_VERBOSE |
| for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) { |
| snprintf(tag, sizeof(tag), |
| "WaitQ.add.to.tail.%04u.waiters", i); |
| snprintf(description, sizeof(description), |
| "%-40s - Add thread of priority %u", |
| tag, dummy_thread[i].prio); |
| PRINT_STATS_AVG(description, (uint32_t)add_cycles[i], |
| CONFIG_BENCHMARK_NUM_ITERATIONS); |
| } |
| #endif |
| |
| printk("------------------------------------\n"); |
| |
| compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, |
| CONFIG_BENCHMARK_NUM_ITERATIONS, |
| remove_cycles, |
| "Remove threads of decreasing priority"); |
| |
| #ifdef CONFIG_BENCHMARK_VERBOSE |
| for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) { |
| snprintf(tag, sizeof(tag), |
| "WaitQ.remove.from.head.%04u.waiters", |
| CONFIG_BENCHMARK_NUM_THREADS - i); |
| snprintf(description, sizeof(description), |
| "%-40s - Remove thread of priority %u", |
| tag, dummy_thread[i].prio); |
| PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i], |
| CONFIG_BENCHMARK_NUM_ITERATIONS); |
| } |
| #endif |
| |
| printk("------------------------------------\n"); |
| |
| cycles_reset(CONFIG_BENCHMARK_NUM_THREADS); |
| |
| for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) { |
| test_increasing_priority(&wait_q, CONFIG_BENCHMARK_NUM_THREADS); |
| } |
| |
| compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, |
| CONFIG_BENCHMARK_NUM_ITERATIONS, |
| add_cycles, |
| "Add threads of increasing priority"); |
| |
| #ifdef CONFIG_BENCHMARK_VERBOSE |
| for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) { |
| snprintf(tag, sizeof(tag), |
| "WaitQ.add.to.head.%04u.waiters", i); |
| thread = (struct k_thread *) |
| &dummy_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1]; |
| snprintf(description, sizeof(description), |
| "%-40s - Add priority %u to waitq", |
| tag, thread->base.prio); |
| PRINT_STATS_AVG(description, (uint32_t)add_cycles[i], |
| CONFIG_BENCHMARK_NUM_ITERATIONS); |
| } |
| #endif |
| |
| printk("------------------------------------\n"); |
| |
| compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, |
| CONFIG_BENCHMARK_NUM_ITERATIONS, |
| remove_cycles, |
| "Remove threads of increasing priority"); |
| |
| #ifdef CONFIG_BENCHMARK_VERBOSE |
| for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) { |
| snprintf(tag, sizeof(tag), |
| "WaitQ.remove.from.tail.%04u.waiters", |
| CONFIG_BENCHMARK_NUM_THREADS - i); |
| thread = (struct k_thread *) |
| &dummy_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1]; |
| snprintf(description, sizeof(description), |
| "%-40s - Remove priority %u from waitq", |
| tag, thread->base.prio); |
| PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i], |
| CONFIG_BENCHMARK_NUM_ITERATIONS); |
| } |
| #endif |
| |
| timing_stop(); |
| |
| TC_END_REPORT(0); |
| |
| return 0; |
| } |