blob: ac22f8ee430a3d41b2254378890b1e1701c60619 [file] [log] [blame]
/*
* Copyright (c) 2024 Nordic Semiconductor ASA
* SPDX-License-Identifier: Apache-2.0
*/
#include <string.h>
#include <zephyr/cache.h>
#include <zephyr/kernel.h>
#include <zephyr/sys/bitarray.h>
#include <zephyr/mem_mgmt/mem_attr.h>
#include "dmm.h"
#define _FILTER_MEM(node_id, fn) \
COND_CODE_1(DT_NODE_HAS_PROP(node_id, zephyr_memory_attr), (fn(node_id)), ())
#define DT_MEMORY_REGION_FOREACH_STATUS_OKAY_NODE(fn) \
DT_FOREACH_STATUS_OKAY_NODE_VARGS(_FILTER_MEM, fn)
#define __BUILD_LINKER_END_VAR(_name) DT_CAT3(__, _name, _end)
#define _BUILD_LINKER_END_VAR(node_id) \
__BUILD_LINKER_END_VAR(DT_STRING_UNQUOTED(node_id, zephyr_memory_region))
#define _BUILD_MEM_REGION(node_id) \
{.dt_addr = DT_REG_ADDR(node_id), \
.dt_size = DT_REG_SIZE(node_id), \
.dt_attr = DT_PROP(node_id, zephyr_memory_attr), \
.dt_align = DMM_REG_ALIGN_SIZE(node_id), \
.dt_allc = &_BUILD_LINKER_END_VAR(node_id)},
#define HEAP_NUM_WORDS (CONFIG_DMM_HEAP_CHUNKS / 32)
BUILD_ASSERT(IS_ALIGNED(CONFIG_DMM_HEAP_CHUNKS, 32));
/* Generate declarations of linker variables used to determine size of preallocated variables
* stored in memory sections spanning over memory regions.
* These are used to determine memory left for dynamic bounce buffer allocator to work with.
*/
#define _DECLARE_LINKER_VARS(node_id) extern uint32_t _BUILD_LINKER_END_VAR(node_id);
DT_MEMORY_REGION_FOREACH_STATUS_OKAY_NODE(_DECLARE_LINKER_VARS);
struct dmm_region {
uintptr_t dt_addr;
size_t dt_size;
uint32_t dt_attr;
uint32_t dt_align;
void *dt_allc;
};
struct dmm_heap {
uint32_t mask[HEAP_NUM_WORDS];
atomic_t tail_mask[HEAP_NUM_WORDS];
uintptr_t ptr;
uintptr_t ptr_end;
size_t blk_size;
const struct dmm_region *region;
sys_bitarray_t bitarray;
#ifdef CONFIG_DMM_STATS
atomic_t curr_use;
uint32_t max_use;
struct k_spinlock lock;
#endif
};
static const struct dmm_region dmm_regions[] = {
DT_MEMORY_REGION_FOREACH_STATUS_OKAY_NODE(_BUILD_MEM_REGION)
};
struct {
struct dmm_heap dmm_heaps[ARRAY_SIZE(dmm_regions)];
} dmm_heaps_data;
static struct dmm_heap *dmm_heap_find(void *region)
{
struct dmm_heap *dh;
for (size_t idx = 0; idx < ARRAY_SIZE(dmm_heaps_data.dmm_heaps); idx++) {
dh = &dmm_heaps_data.dmm_heaps[idx];
if (dh->region->dt_addr == (uintptr_t)region) {
return dh;
}
}
return NULL;
}
static bool is_region_cacheable(const struct dmm_region *region)
{
return (IS_ENABLED(CONFIG_DCACHE) && (region->dt_attr & DT_MEM_CACHEABLE));
}
static bool is_buffer_within_region(uintptr_t start, size_t size,
uintptr_t reg_start, size_t reg_size)
{
return ((start >= reg_start) && ((start + size) <= (reg_start + reg_size)));
}
static bool is_user_buffer_correctly_preallocated(void const *user_buffer, size_t user_length,
const struct dmm_region *region)
{
uintptr_t addr = (uintptr_t)user_buffer;
if (!is_buffer_within_region(addr, user_length, region->dt_addr, region->dt_size)) {
return false;
}
if (!is_region_cacheable(region)) {
/* Buffer is contained within non-cacheable region - use it as it is. */
return true;
}
if (IS_ALIGNED(addr, region->dt_align)) {
/* If buffer is in cacheable region it must be aligned to data cache line size. */
return true;
}
return false;
}
/* Function updates the tail bits mask after the allocation. Tail bits are all bits
* except the head. Tail bits mask together with a known index of the start of
* chunk (because freeing has a buffer address) allows to determine the size of the
* buffer (how many chunks were included. Because tail_mask is updated after allocation
* we can safely modify bits that represents allocated buffer, we only need to use
* atomic operation on the mask since mask may be modified (but different bits).
*/
static void tail_mask_set(atomic_t *tail_mask, size_t num_bits, size_t off)
{
size_t tail_bits = num_bits - 1;
size_t tail_off = off + 1;
if (tail_bits == 0) {
return;
}
if (HEAP_NUM_WORDS == 1) {
atomic_or(tail_mask, BIT_MASK(tail_bits) << tail_off);
return;
}
size_t idx = tail_off / 32;
atomic_t *t_mask = &tail_mask[idx];
tail_off = tail_off % 32;
while (tail_bits > 0) {
uint32_t bits = MIN(32 - tail_off, tail_bits);
uint32_t mask = (bits == 32) ? UINT32_MAX : (BIT_MASK(bits) << tail_off);
atomic_or(t_mask, mask);
t_mask++;
tail_off = 0;
tail_bits -= bits;
}
}
/* Function determines how many chunks were used for the allocated buffer. It is
* determined from tail bits mask and index of the starting chunk (%p off).
* Function is called before bits are freed in the bitarray so we can safely modify
* bits that belong to that buffer.
*
* @param tail_mask Pointer to tail_mask array.
* @param off Index of the start of the buffer.
*
* @return Number of chunks that forms the buffer that will be freed.
*/
static uint32_t num_bits_get(atomic_t *tail_mask, size_t off)
{
uint32_t num_bits = 1;
size_t tail_off = off + 1;
size_t idx = tail_off / 32;
atomic_t *t_mask = &tail_mask[idx];
tail_off = tail_off % 32;
do {
uint32_t mask = (uint32_t)*t_mask >> tail_off;
if (mask == UINT32_MAX) {
num_bits += 32;
atomic_set(t_mask, 0);
} else {
uint32_t bits = __builtin_ctz(~mask);
if (bits == 0) {
break;
}
num_bits += bits;
atomic_and(t_mask, ~(BIT_MASK(bits) << tail_off));
if (bits + tail_off < 32) {
break;
}
tail_off = 0;
}
t_mask++;
} while ((HEAP_NUM_WORDS > 1) && (t_mask != &tail_mask[HEAP_NUM_WORDS]));
return num_bits;
}
static void *dmm_buffer_alloc(struct dmm_heap *dh, size_t length)
{
size_t num_bits, off;
int rv;
if (dh->ptr == 0) {
/* Not initialized. */
return NULL;
}
length = ROUND_UP(length, dh->region->dt_align);
num_bits = DIV_ROUND_UP(length, dh->blk_size);
rv = sys_bitarray_alloc(&dh->bitarray, num_bits, &off);
if (rv < 0) {
return NULL;
}
tail_mask_set(dh->tail_mask, num_bits, off);
#ifdef CONFIG_DMM_STATS
k_spinlock_key_t key;
key = k_spin_lock(&dh->lock);
dh->curr_use += num_bits;
dh->max_use = MAX(dh->max_use, dh->curr_use);
k_spin_unlock(&dh->lock, key);
#endif
return (void *)(dh->ptr + dh->blk_size * off);
}
static void dmm_buffer_free(struct dmm_heap *dh, void *buffer)
{
size_t offset = ((uintptr_t)buffer - dh->ptr) / dh->blk_size;
size_t num_bits = num_bits_get(dh->tail_mask, offset);
int rv;
#ifdef CONFIG_DMM_STATS
atomic_sub(&dh->curr_use, num_bits);
#endif
rv = sys_bitarray_free(&dh->bitarray, num_bits, offset);
(void)rv;
__ASSERT_NO_MSG(rv == 0);
}
static void dmm_memcpy(void *dst, const void *src, size_t len)
{
#define IS_ALIGNED32(x) IS_ALIGNED(x, sizeof(uint32_t))
#define IS_ALIGNED64(x) IS_ALIGNED(x, sizeof(uint64_t))
if (IS_ALIGNED64(len) && IS_ALIGNED64(dst) && IS_ALIGNED64(src)) {
for (uint32_t i = 0; i < len / sizeof(uint64_t); i++) {
((uint64_t *)dst)[i] = ((uint64_t *)src)[i];
}
return;
}
if (IS_ALIGNED32(len) && IS_ALIGNED32(dst) && IS_ALIGNED32(src)) {
for (uint32_t i = 0; i < len / sizeof(uint32_t); i++) {
((uint32_t *)dst)[i] = ((uint32_t *)src)[i];
}
return;
}
memcpy(dst, src, len);
}
int dmm_buffer_out_prepare(void *region, void const *user_buffer, size_t user_length,
void **buffer_out)
{
struct dmm_heap *dh;
if (user_length == 0) {
/* Assume that zero-length buffers are correct as they are. */
*buffer_out = (void *)user_buffer;
return 0;
}
/* Get memory region that specified device can perform DMA transfers from */
dh = dmm_heap_find(region);
if (dh == NULL) {
return -EINVAL;
}
/* Check if:
* - provided user buffer is already in correct memory region,
* - provided user buffer is aligned and padded to cache line,
* if it is located in cacheable region.
*/
if (is_user_buffer_correctly_preallocated(user_buffer, user_length, dh->region)) {
/* If yes, assign buffer_out to user_buffer*/
*buffer_out = (void *)user_buffer;
} else {
/* If no:
* - dynamically allocate buffer in correct memory region that respects cache line
* alignment and padding
*/
*buffer_out = dmm_buffer_alloc(dh, user_length);
/* Return error if dynamic allocation fails */
if (*buffer_out == NULL) {
return -ENOMEM;
}
/* - copy user buffer contents into allocated buffer */
dmm_memcpy(*buffer_out, user_buffer, user_length);
}
/* Check if device memory region is cacheable
* If yes, writeback all cache lines associated with output buffer
* (either user or allocated)
*/
if (is_region_cacheable(dh->region)) {
sys_cache_data_flush_range(*buffer_out, user_length);
}
/* If no, no action is needed */
return 0;
}
int dmm_buffer_out_release(void *region, void *buffer_out)
{
struct dmm_heap *dh;
uintptr_t addr = (uintptr_t)buffer_out;
/* Get memory region that specified device can perform DMA transfers from */
dh = dmm_heap_find(region);
if (dh == NULL) {
return -EINVAL;
}
/* Check if output buffer is contained within memory area
* managed by dynamic memory allocator
*/
if (is_buffer_within_region(addr, 0, dh->ptr, dh->ptr_end)) {
/* If yes, free the buffer */
dmm_buffer_free(dh, buffer_out);
}
/* If no, no action is needed */
return 0;
}
int dmm_buffer_in_prepare(void *region, void *user_buffer, size_t user_length, void **buffer_in)
{
struct dmm_heap *dh;
if (user_length == 0) {
/* Assume that zero-length buffers are correct as they are. */
*buffer_in = (void *)user_buffer;
return 0;
}
/* Get memory region that specified device can perform DMA transfers to */
dh = dmm_heap_find(region);
if (dh == NULL) {
return -EINVAL;
}
/* Check if:
* - provided user buffer is already in correct memory region,
* - provided user buffer is aligned and padded to cache line,
* if it is located in cacheable region.
*/
if (is_user_buffer_correctly_preallocated(user_buffer, user_length, dh->region)) {
/* If yes, assign buffer_in to user_buffer */
*buffer_in = user_buffer;
} else {
/* If no, dynamically allocate buffer in correct memory region that respects cache
* line alignment and padding
*/
*buffer_in = dmm_buffer_alloc(dh, user_length);
/* Return error if dynamic allocation fails */
if (*buffer_in == NULL) {
return -ENOMEM;
}
}
/* Check if device memory region is cacheable
* If yes, invalidate all cache lines associated with input buffer
* (either user or allocated) to clear potential dirty bits.
*/
if (is_region_cacheable(dh->region)) {
sys_cache_data_invd_range(*buffer_in, user_length);
}
/* If no, no action is needed */
return 0;
}
int dmm_buffer_in_release(void *region, void *user_buffer, size_t user_length, void *buffer_in)
{
struct dmm_heap *dh;
uintptr_t addr = (uintptr_t)buffer_in;
/* Get memory region that specified device can perform DMA transfers to, using devicetree */
dh = dmm_heap_find(region);
if (dh == NULL) {
return -EINVAL;
}
/* Check if device memory region is cacheable
* If yes, invalidate all cache lines associated with input buffer
* (either user or allocated)
*/
if (is_region_cacheable(dh->region)) {
sys_cache_data_invd_range(buffer_in, user_length);
}
/* If no, no action is needed */
/* Check if user buffer and allocated buffer points to the same memory location
* If no, copy allocated buffer to the user buffer
*/
if (buffer_in != user_buffer) {
dmm_memcpy(user_buffer, buffer_in, user_length);
}
/* If yes, no action is needed */
/* Check if input buffer is contained within memory area
* managed by dynamic memory allocator
*/
if (is_buffer_within_region(addr, user_length, dh->ptr, dh->ptr_end)) {
/* If yes, free the buffer */
dmm_buffer_free(dh, buffer_in);
}
/* If no, no action is needed */
return 0;
}
int dmm_stats_get(void *region, uintptr_t *start_addr, uint32_t *curr_use, uint32_t *max_use)
{
#ifdef CONFIG_DMM_STATS
struct dmm_heap *dh;
dh = dmm_heap_find(region);
if (dh == NULL) {
return -EINVAL;
}
if (start_addr) {
*start_addr = dh->ptr;
}
if (curr_use) {
*curr_use = (100 * dh->curr_use) / dh->bitarray.num_bits;
}
if (max_use) {
*max_use = (100 * dh->max_use) / dh->bitarray.num_bits;
}
return 0;
#else
return -ENOTSUP;
#endif
}
int dmm_init(void)
{
struct dmm_heap *dh;
int blk_cnt;
int heap_space;
for (size_t idx = 0; idx < ARRAY_SIZE(dmm_regions); idx++) {
dh = &dmm_heaps_data.dmm_heaps[idx];
dh->region = &dmm_regions[idx];
dh->ptr = ROUND_UP(dh->region->dt_allc, dh->region->dt_align);
heap_space = dh->region->dt_size - (dh->ptr - dh->region->dt_addr);
dh->blk_size = ROUND_UP(heap_space / (32 * HEAP_NUM_WORDS), dh->region->dt_align);
blk_cnt = heap_space / dh->blk_size;
dh->ptr_end = dh->ptr + blk_cnt * dh->blk_size;
dh->bitarray.num_bits = blk_cnt;
dh->bitarray.num_bundles = HEAP_NUM_WORDS;
dh->bitarray.bundles = dh->mask;
}
return 0;
}