blob: b71b65e7c3375857ee4f0e373c3ae3a630afd4bb [file] [log] [blame]
/*
* Copyright (c) 2024 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
/*
* @file
* This file contains the main testing module that invokes all the tests.
*/
#include <zephyr/kernel.h>
#include <zephyr/timestamp.h>
#include "utils.h"
#include <zephyr/tc_util.h>
#include <ksched.h>
#define TEST_STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE)
#define BUSY_STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE)
uint32_t tm_off;
/*
* Warning! Most of the created threads in this test use the same stack!
* This is done to reduce the memory footprint as having unique stacks
* for hundreds or thousands of threads would require substantial memory.
* We can get away with this approach as the threads sharing the same
* stack will not be executing, even though they will be ready to run.
*/
static K_THREAD_STACK_DEFINE(test_stack, TEST_STACK_SIZE);
K_THREAD_STACK_ARRAY_DEFINE(busy_stack, CONFIG_MP_MAX_NUM_CPUS - 1, BUSY_STACK_SIZE);
static struct k_thread busy_thread[CONFIG_MP_MAX_NUM_CPUS - 1];
static struct k_thread test_thread[CONFIG_BENCHMARK_NUM_THREADS];
static uint64_t add_cycles[CONFIG_BENCHMARK_NUM_THREADS];
static uint64_t remove_cycles[CONFIG_BENCHMARK_NUM_THREADS];
extern void z_unready_thread(struct k_thread *thread);
static void busy_entry(void *p1, void *p2, void *p3)
{
ARG_UNUSED(p1);
ARG_UNUSED(p2);
ARG_UNUSED(p3);
while (1) {
}
}
/**
* The test entry routine is not expected to execute.
*/
static void test_entry(void *p1, void *p2, void *p3)
{
ARG_UNUSED(p2);
ARG_UNUSED(p3);
printk("Thread %u unexpectedly executed\n",
(unsigned int)(uintptr_t)p1);
while (1) {
}
}
static void start_threads(unsigned int num_threads)
{
unsigned int i;
unsigned int bucket_size;
/* Start the busy threads to execute on the other processors */
for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS - 1; i++) {
k_thread_create(&busy_thread[i], busy_stack[i], BUSY_STACK_SIZE,
busy_entry, NULL, NULL, NULL,
-1, 0, K_NO_WAIT);
}
bucket_size = (num_threads / CONFIG_NUM_PREEMPT_PRIORITIES) + 1;
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
k_thread_create(&test_thread[i], test_stack, TEST_STACK_SIZE,
test_entry, (void *)(uintptr_t)i, NULL, NULL,
i / bucket_size, 0, K_NO_WAIT);
}
}
static void cycles_reset(unsigned int num_threads)
{
unsigned int i;
for (i = 0; i < num_threads; i++) {
add_cycles[i] = 0ULL;
remove_cycles[i] = 0ULL;
}
}
static void test_decreasing_priority(unsigned int num_threads)
{
unsigned int i;
timing_t start;
timing_t finish;
for (i = num_threads; i > 0; i--) {
start = timing_counter_get();
z_unready_thread(&test_thread[i - 1]);
finish = timing_counter_get();
remove_cycles[i - 1] += timing_cycles_get(&start, &finish);
}
for (i = 0; i < num_threads; i++) {
start = timing_counter_get();
z_ready_thread(&test_thread[i]);
finish = timing_counter_get();
add_cycles[i] += timing_cycles_get(&start, &finish);
}
}
static void test_increasing_priority(unsigned int num_threads)
{
unsigned int i;
timing_t start;
timing_t finish;
for (i = num_threads; i > 0; i--) {
start = timing_counter_get();
z_unready_thread(&test_thread[num_threads - i]);
finish = timing_counter_get();
remove_cycles[i - 1] += timing_cycles_get(&start, &finish);
}
for (i = num_threads; i > 0; i--) {
start = timing_counter_get();
z_ready_thread(&test_thread[i - 1]);
finish = timing_counter_get();
add_cycles[num_threads - i] += timing_cycles_get(&start, &finish);
}
}
static uint64_t sqrt_u64(uint64_t square)
{
if (square > 1) {
uint64_t lo = sqrt_u64(square >> 2) << 1;
uint64_t hi = lo + 1;
return ((hi * hi) > square) ? lo : hi;
}
return square;
}
static void compute_and_report_stats(unsigned int num_threads, unsigned int num_iterations,
uint64_t *cycles, const char *tag, const char *str)
{
uint64_t minimum = cycles[0];
uint64_t maximum = cycles[0];
uint64_t total = cycles[0];
uint64_t average;
uint64_t std_dev = 0;
uint64_t tmp;
uint64_t diff;
unsigned int i;
for (i = 1; i < num_threads; i++) {
if (cycles[i] > maximum) {
maximum = cycles[i];
}
if (cycles[i] < minimum) {
minimum = cycles[i];
}
total += cycles[i];
}
minimum /= (uint64_t)num_iterations;
maximum /= (uint64_t)num_iterations;
average = total / (num_threads * num_iterations);
for (i = 0; i < num_threads; i++) {
tmp = cycles[i] / num_iterations;
diff = (average > tmp) ? (average - tmp) : (tmp - average);
std_dev += (diff * diff);
}
std_dev /= num_threads;
std_dev = sqrt_u64(std_dev);
#ifdef CONFIG_BENCHMARK_RECORDING
int tag_len = strlen(tag);
int descr_len = strlen(str);
int stag_len = strlen(".stddev");
int sdescr_len = strlen(", stddev.");
stag_len = (tag_len + stag_len < 40) ? 40 - tag_len : stag_len;
sdescr_len = (descr_len + sdescr_len < 50) ? 50 - descr_len : sdescr_len;
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".min", str,
sdescr_len, ", min.", minimum, (uint32_t)timing_cycles_to_ns(minimum));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".max", str,
sdescr_len, ", max.", maximum, (uint32_t)timing_cycles_to_ns(maximum));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".avg", str,
sdescr_len, ", avg.", average, (uint32_t)timing_cycles_to_ns(average));
printk("REC: %s%-*s - %s%-*s : %7llu cycles , %7u ns :\n", tag, stag_len, ".stddev", str,
sdescr_len, ", stddev.", std_dev, (uint32_t)timing_cycles_to_ns(std_dev));
#else
ARG_UNUSED(tag);
printk("------------------------------------\n");
printk("%s\n", str);
printk(" Minimum : %7llu cycles (%7u nsec)\n", minimum,
(uint32_t)timing_cycles_to_ns(minimum));
printk(" Maximum : %7llu cycles (%7u nsec)\n", maximum,
(uint32_t)timing_cycles_to_ns(maximum));
printk(" Average : %7llu cycles (%7u nsec)\n", average,
(uint32_t)timing_cycles_to_ns(average));
printk(" Std Deviation: %7llu cycles (%7u nsec)\n", std_dev,
(uint32_t)timing_cycles_to_ns(std_dev));
#endif
}
int main(void)
{
unsigned int i;
unsigned int freq;
#ifdef CONFIG_BENCHMARK_VERBOSE
char description[120];
char tag[50];
struct k_thread *thread;
#endif
timing_init();
bench_test_init();
freq = timing_freq_get_mhz();
printk("Time Measurements for %s sched queues\n",
IS_ENABLED(CONFIG_SCHED_SIMPLE) ? "simple" :
IS_ENABLED(CONFIG_SCHED_SCALABLE) ? "scalable" : "multiq");
printk("Timing results: Clock frequency: %u MHz\n", freq);
start_threads(CONFIG_BENCHMARK_NUM_THREADS);
timing_start();
cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
test_decreasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
}
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles, "sched.add.thread.ReadyQ_tail",
"Add threads of decreasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
snprintf(tag, sizeof(tag),
"ReadyQ.add.to.tail.%04u.waiters", i);
snprintf(description, sizeof(description), "%-40s - Add thread of priority (%u)",
tag, test_thread[i].base.prio);
PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
CONFIG_BENCHMARK_NUM_ITERATIONS);
}
#endif
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles, "sched.remove.thread.ReadyQ_head",
"Remove threads of decreasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
snprintf(tag, sizeof(tag),
"ReadyQ.remove.from.head.%04u.waiters", i);
snprintf(description, sizeof(description),
"%-40s - Remove thread of priority %u",
tag, test_thread[i].base.prio);
PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
CONFIG_BENCHMARK_NUM_ITERATIONS);
}
#endif
cycles_reset(CONFIG_BENCHMARK_NUM_THREADS);
for (i = 0; i < CONFIG_BENCHMARK_NUM_ITERATIONS; i++) {
test_increasing_priority(CONFIG_BENCHMARK_NUM_THREADS);
}
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
add_cycles, "sched.add.thread.ReadyQ_head",
"Add threads of increasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
snprintf(tag, sizeof(tag),
"ReadyQ.add.to.head.%04u.waiters", i);
thread = &test_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
snprintf(description, sizeof(description),
"%-40s - Add priority %u to readyq",
tag, thread->base.prio);
PRINT_STATS_AVG(description, (uint32_t)add_cycles[i],
CONFIG_BENCHMARK_NUM_ITERATIONS);
}
#endif
compute_and_report_stats(CONFIG_BENCHMARK_NUM_THREADS, CONFIG_BENCHMARK_NUM_ITERATIONS,
remove_cycles, "sched.remove.thread.ReadyQ_tail",
"Remove threads of increasing priority");
#ifdef CONFIG_BENCHMARK_VERBOSE
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
snprintf(tag, sizeof(tag),
"ReadyQ.remove.from.tail.%04u.waiters",
CONFIG_BENCHMARK_NUM_THREADS - i);
thread = &test_thread[CONFIG_BENCHMARK_NUM_THREADS - i - 1];
snprintf(description, sizeof(description),
"%-40s - Remove lowest priority from readyq (%u)",
tag, thread->base.prio);
PRINT_STATS_AVG(description, (uint32_t)remove_cycles[i],
CONFIG_BENCHMARK_NUM_ITERATIONS);
}
#endif
for (i = 0; i < CONFIG_BENCHMARK_NUM_THREADS; i++) {
k_thread_abort(&test_thread[i]);
}
timing_stop();
TC_END_REPORT(0);
return 0;
}