blob: f793b9c72eb21cc58e81ad97dbd41495864c3653 [file] [log] [blame]
/*
* Copyright (c) 2022 Meta
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdlib.h>
#include <zephyr/random/rand32.h>
#include <zephyr/sys/hash_function.h>
#include <zephyr/ztest.h>
BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_ENTRIES > 0);
BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_BUCKETS > 0);
BUILD_ASSERT(CONFIG_TEST_HASH_FUNC_NUM_ENTRIES >= 10 * CONFIG_TEST_HASH_FUNC_NUM_BUCKETS);
static void print_buckets(const char *label, float *buckets, size_t n)
{
if (IS_ENABLED(CONFIG_TEST_HASH_FUNC_DEBUG)) {
printk("%s\n", label);
for (size_t i = 0; i < n; ++i) {
printk("%f, ", (double)buckets[i]);
}
printk("\n");
}
}
static void create_histogram(float *buckets, size_t n)
{
uint32_t entry;
uint32_t hash;
size_t bucket;
for (size_t i = 0; i < CONFIG_TEST_HASH_FUNC_NUM_ENTRIES; ++i) {
/* generate a random value (note: could be any random data source) */
entry = sys_rand32_get();
/* hash the random value */
hash = sys_hash32(&entry, sizeof(entry));
/* mod the hash to determine which bucket to increment */
bucket = hash % CONFIG_TEST_HASH_FUNC_NUM_BUCKETS;
buckets[bucket]++;
}
}
static int compare_floats(const void *a, const void *b)
{
float aa = *(float *)a;
float bb = *(float *)b;
return (aa > bb) - (aa < bb);
}
static int kolmogorov_smirnov_test(float *buckets, size_t n)
{
float d;
float f_x;
float prev;
float f0_x;
float d_max;
float d_alpha_sq;
/* sort observations in ascending order */
qsort(buckets, n, sizeof(*buckets), compare_floats);
/* calculate the cdf of observations */
prev = 0;
for (size_t i = 0; i < n; ++i) {
buckets[i] += prev;
prev = buckets[i];
}
for (size_t i = 0; i < n; ++i) {
buckets[i] /= buckets[n - 1];
}
print_buckets("cdf", buckets, n);
/* Compute the absolute differences */
d_max = 0;
for (size_t i = 0; i < n; ++i) {
/* cdf of hypothesized distribution (uniform, in this case) */
f0_x = (float)(i + 1) / n;
/* cdf of empirical distribution */
f_x = buckets[i];
d = (f_x >= f0_x) ? (f_x - f0_x) : (f0_x - f_x);
if (d > d_max) {
d_max = d;
}
buckets[i] = d;
}
print_buckets("differences", buckets, n);
/*
* Calculate the critical value
*
* For n >= 40, the critical value can be estimated for various alpha.
*
* http://oak.ucc.nau.edu/rh83/Statistics/ks1/
*
* E.g. For alpha = 0.05, the estimator is 1.36 / sqrt(n)
*
* However, since we lack sqrt(n), we have to square both sides
* of the comparison. So,
*
* D > 1.36 / sqrt(n)
* D^2 > (1.36 / sqrt(n))^2
* D^2 > 1.8496 / n
*/
d_alpha_sq = 1.8496f / n;
if (IS_ENABLED(CONFIG_TEST_HASH_FUNC_DEBUG)) {
printk("d_max^2: %f\n", (double)(d_max * d_max));
printk("d_alpha^2: %f\n", (double)d_alpha_sq);
}
if (d_max * d_max > d_alpha_sq) {
return -EINVAL;
}
return 0;
}
ZTEST(hash_function, test_sys_hash32)
{
float buckets[CONFIG_TEST_HASH_FUNC_NUM_BUCKETS] = {0};
create_histogram(buckets, ARRAY_SIZE(buckets));
print_buckets("histogram", buckets, ARRAY_SIZE(buckets));
zassert_ok(kolmogorov_smirnov_test(buckets, ARRAY_SIZE(buckets)));
}
ZTEST_SUITE(hash_function, NULL, NULL, NULL, NULL, NULL);