include/zephyr/arch/xtensa/cache.h - third_party/github/zephyrproject-rtos/zephyr - Git at Google

 /*
  * Copyright 2021 Intel Corporation
  * SPDX-License-Identifier: Apache-2.0
  */
 #ifndef ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
 #define ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_

 #include <xtensa/config/core-isa.h>
 #include <zephyr/toolchain.h>
 #include <zephyr/sys/util.h>
 #include <zephyr/debug/sparse.h>

 #ifdef __cplusplus
 extern "C" {
 #endif

 #define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)

 #if XCHAL_DCACHE_SIZE
 BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
 BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
 #endif

 static ALWAYS_INLINE void z_xtensa_cache_flush(void *addr, size_t bytes)
 {
 #if XCHAL_DCACHE_SIZE
 	size_t step = XCHAL_DCACHE_LINESIZE;
 	size_t first = ROUND_DOWN(addr, step);
 	size_t last = ROUND_UP(((long)addr) + bytes, step);
 	size_t line;

 	for (line = first; bytes && line < last; line += step) {
 		__asm__ volatile("dhwb %0, 0" :: "r"(line));
 	}
 #endif
 }

 static ALWAYS_INLINE void z_xtensa_cache_flush_inv(void *addr, size_t bytes)
 {
 #if XCHAL_DCACHE_SIZE
 	size_t step = XCHAL_DCACHE_LINESIZE;
 	size_t first = ROUND_DOWN(addr, step);
 	size_t last = ROUND_UP(((long)addr) + bytes, step);
 	size_t line;

 	for (line = first; bytes && line < last; line += step) {
 		__asm__ volatile("dhwbi %0, 0" :: "r"(line));
 	}
 #endif
 }

 static ALWAYS_INLINE void z_xtensa_cache_inv(void *addr, size_t bytes)
 {
 #if XCHAL_DCACHE_SIZE
 	size_t step = XCHAL_DCACHE_LINESIZE;
 	size_t first = ROUND_DOWN(addr, step);
 	size_t last = ROUND_UP(((long)addr) + bytes, step);
 	size_t line;

 	for (line = first; bytes && line < last; line += step) {
 		__asm__ volatile("dhi %0, 0" :: "r"(line));
 	}
 #endif
 }

 static ALWAYS_INLINE void z_xtensa_cache_inv_all(void)
 {
 	z_xtensa_cache_inv(NULL, Z_DCACHE_MAX);
 }

 static ALWAYS_INLINE void z_xtensa_cache_flush_all(void)
 {
 	z_xtensa_cache_flush(NULL, Z_DCACHE_MAX);
 }

 static ALWAYS_INLINE void z_xtensa_cache_flush_inv_all(void)
 {
 	z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX);
 }


 #if defined(CONFIG_XTENSA_RPO_CACHE)
 #if defined(CONFIG_ARCH_HAS_COHERENCE)
 static inline bool arch_mem_coherent(void *ptr)
 {
 	size_t addr = (size_t) ptr;

 	return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION;
 }
 #endif

 static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom)
 {
 	/* The math here is all compile-time: when the two regions
 	 * differ by a power of two, we can convert between them by
 	 * setting or clearing just one bit.  Otherwise it needs two
 	 * operations.
 	 */
 	uint32_t rxor = (rto ^ rfrom) << 29;

 	rto <<= 29;
 	if (Z_IS_POW2(rxor)) {
 		if ((rxor & rto) == 0) {
 			return addr & ~rxor;
 		} else {
 			return addr | rxor;
 		}
 	} else {
 		return (addr & ~(7U << 29)) | rto;
 	}
 }
 /**
  * @brief Return cached pointer to a RAM address
  *
  * The Xtensa coherence architecture maps addressable RAM twice, in
  * two different 512MB regions whose L1 cache settings can be
  * controlled independently.  So for any given pointer, it is possible
  * to convert it to and from a cached version.
  *
  * This function takes a pointer to any addressable object (either in
  * cacheable memory or not) and returns a pointer that can be used to
  * refer to the same memory through the L1 data cache.  Data read
  * through the resulting pointer will reflect locally cached values on
  * the current CPU if they exist, and writes will go first into the
  * cache and be written back later.
  *
  * @see arch_xtensa_uncached_ptr()
  *
  * @param ptr A pointer to a valid C object
  * @return A pointer to the same object via the L1 dcache
  */
 static inline void __sparse_cache *arch_xtensa_cached_ptr(void *ptr)
 {
 	return (__sparse_force void __sparse_cache *)z_xtrpoflip((uint32_t) ptr,
 						CONFIG_XTENSA_CACHED_REGION,
 						CONFIG_XTENSA_UNCACHED_REGION);
 }

 /**
  * @brief Return uncached pointer to a RAM address
  *
  * The Xtensa coherence architecture maps addressable RAM twice, in
  * two different 512MB regions whose L1 cache settings can be
  * controlled independently.  So for any given pointer, it is possible
  * to convert it to and from a cached version.
  *
  * This function takes a pointer to any addressable object (either in
  * cacheable memory or not) and returns a pointer that can be used to
  * refer to the same memory while bypassing the L1 data cache.  Data
  * in the L1 cache will not be inspected nor modified by the access.
  *
  * @see arch_xtensa_cached_ptr()
  *
  * @param ptr A pointer to a valid C object
  * @return A pointer to the same object bypassing the L1 dcache
  */
 static inline void *arch_xtensa_uncached_ptr(void __sparse_cache *ptr)
 {
 	return (void *)z_xtrpoflip((uint32_t) ptr,
 				   CONFIG_XTENSA_UNCACHED_REGION,
 				   CONFIG_XTENSA_CACHED_REGION);
 }

 /* Utility to generate an unrolled and optimal[1] code sequence to set
  * the RPO TLB registers (contra the HAL cacheattr macros, which
  * generate larger code and can't be called from C), based on the
  * KERNEL_COHERENCE configuration in use.  Selects RPO attribute "2"
  * for regions (including MMIO registers in region zero) which want to
  * bypass L1, "4" for the cached region which wants writeback, and
  * "15" (invalid) elsewhere.
  *
  * Note that on cores that have the "translation" option set, we need
  * to put an identity mapping in the high bits.  Also per spec
  * changing the current code region (by definition cached) requires
  * that WITLB be followed by an ISYNC and that both instructions live
  * in the same cache line (two 3-byte instructions fit in an 8-byte
  * aligned region, so that's guaranteed not to cross a cache line
  * boundary).
  *
  * [1] With the sole exception of gcc's infuriating insistence on
  * emitting a precomputed literal for addr + addrincr instead of
  * computing it with a single ADD instruction from values it already
  * has in registers.  Explicitly assigning the variables to registers
  * via an attribute works, but then emits needless MOV instructions
  * instead.  I tell myself it's just 32 bytes of .text, but... Sigh.
  */
 #define _REGION_ATTR(r)						\
 	((r) == 0 ? 2 :						\
 	 ((r) == CONFIG_XTENSA_CACHED_REGION ? 4 :		\
 	  ((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15)))

 #define _SET_ONE_TLB(region) do {				\
 	uint32_t attr = _REGION_ATTR(region);			\
 	if (XCHAL_HAVE_XLT_CACHEATTR) {				\
 		attr |= addr; /* RPO with translation */	\
 	}							\
 	if (region != CONFIG_XTENSA_CACHED_REGION) {		\
 		__asm__ volatile("wdtlb %0, %1; witlb %0, %1"	\
 				 :: "r"(attr), "r"(addr));	\
 	} else {						\
 		__asm__ volatile("wdtlb %0, %1"			\
 				 :: "r"(attr), "r"(addr));	\
 		__asm__ volatile("j 1f; .align 8; 1:");		\
 		__asm__ volatile("witlb %0, %1; isync"		\
 				 :: "r"(attr), "r"(addr));	\
 	}							\
 	addr += addrincr;					\
 } while (0)

 #define ARCH_XTENSA_SET_RPO_TLB() do {				\
 	register uint32_t addr = 0, addrincr = 0x20000000;	\
 	FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7);	\
 } while (0)

 #endif

 #ifdef __cplusplus
 } /* extern "C" */
 #endif

 #endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ */
	/*
	* Copyright 2021 Intel Corporation
	* SPDX-License-Identifier: Apache-2.0
	*/
	#ifndef ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_
	#define ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_

	#include <xtensa/config/core-isa.h>
	#include <zephyr/toolchain.h>
	#include <zephyr/sys/util.h>
	#include <zephyr/debug/sparse.h>

	#ifdef __cplusplus
	extern "C" {
	#endif

	#define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS)

	#if XCHAL_DCACHE_SIZE
	BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE));
	BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX));
	#endif

	static ALWAYS_INLINE void z_xtensa_cache_flush(void *addr, size_t bytes)
	{
	#if XCHAL_DCACHE_SIZE
	size_t step = XCHAL_DCACHE_LINESIZE;
	size_t first = ROUND_DOWN(addr, step);
	size_t last = ROUND_UP(((long)addr) + bytes, step);
	size_t line;

	for (line = first; bytes && line < last; line += step) {
	__asm__ volatile("dhwb %0, 0" :: "r"(line));
	}
	#endif
	}

	static ALWAYS_INLINE void z_xtensa_cache_flush_inv(void *addr, size_t bytes)
	{
	#if XCHAL_DCACHE_SIZE
	size_t step = XCHAL_DCACHE_LINESIZE;
	size_t first = ROUND_DOWN(addr, step);
	size_t last = ROUND_UP(((long)addr) + bytes, step);
	size_t line;

	for (line = first; bytes && line < last; line += step) {
	__asm__ volatile("dhwbi %0, 0" :: "r"(line));
	}
	#endif
	}

	static ALWAYS_INLINE void z_xtensa_cache_inv(void *addr, size_t bytes)
	{
	#if XCHAL_DCACHE_SIZE
	size_t step = XCHAL_DCACHE_LINESIZE;
	size_t first = ROUND_DOWN(addr, step);
	size_t last = ROUND_UP(((long)addr) + bytes, step);
	size_t line;

	for (line = first; bytes && line < last; line += step) {
	__asm__ volatile("dhi %0, 0" :: "r"(line));
	}
	#endif
	}

	static ALWAYS_INLINE void z_xtensa_cache_inv_all(void)
	{
	z_xtensa_cache_inv(NULL, Z_DCACHE_MAX);
	}

	static ALWAYS_INLINE void z_xtensa_cache_flush_all(void)
	{
	z_xtensa_cache_flush(NULL, Z_DCACHE_MAX);
	}

	static ALWAYS_INLINE void z_xtensa_cache_flush_inv_all(void)
	{
	z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX);
	}


	#if defined(CONFIG_XTENSA_RPO_CACHE)
	#if defined(CONFIG_ARCH_HAS_COHERENCE)
	static inline bool arch_mem_coherent(void *ptr)
	{
	size_t addr = (size_t) ptr;

	return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION;
	}
	#endif

	static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom)
	{
	/* The math here is all compile-time: when the two regions
	* differ by a power of two, we can convert between them by
	* setting or clearing just one bit. Otherwise it needs two
	* operations.
	*/
	uint32_t rxor = (rto ^ rfrom) << 29;

	rto <<= 29;
	if (Z_IS_POW2(rxor)) {
	if ((rxor & rto) == 0) {
	return addr & ~rxor;
	} else {
	return addr \| rxor;
	}
	} else {
	return (addr & ~(7U << 29)) \| rto;
	}
	}
	/**
	* @brief Return cached pointer to a RAM address
	*
	* The Xtensa coherence architecture maps addressable RAM twice, in
	* two different 512MB regions whose L1 cache settings can be
	* controlled independently. So for any given pointer, it is possible
	* to convert it to and from a cached version.
	*
	* This function takes a pointer to any addressable object (either in
	* cacheable memory or not) and returns a pointer that can be used to
	* refer to the same memory through the L1 data cache. Data read
	* through the resulting pointer will reflect locally cached values on
	* the current CPU if they exist, and writes will go first into the
	* cache and be written back later.
	*
	* @see arch_xtensa_uncached_ptr()
	*
	* @param ptr A pointer to a valid C object
	* @return A pointer to the same object via the L1 dcache
	*/
	static inline void __sparse_cache arch_xtensa_cached_ptr(void ptr)
	{
	return (__sparse_force void __sparse_cache *)z_xtrpoflip((uint32_t) ptr,
	CONFIG_XTENSA_CACHED_REGION,
	CONFIG_XTENSA_UNCACHED_REGION);
	}

	/**
	* @brief Return uncached pointer to a RAM address
	*
	* The Xtensa coherence architecture maps addressable RAM twice, in
	* two different 512MB regions whose L1 cache settings can be
	* controlled independently. So for any given pointer, it is possible
	* to convert it to and from a cached version.
	*
	* This function takes a pointer to any addressable object (either in
	* cacheable memory or not) and returns a pointer that can be used to
	* refer to the same memory while bypassing the L1 data cache. Data
	* in the L1 cache will not be inspected nor modified by the access.
	*
	* @see arch_xtensa_cached_ptr()
	*
	* @param ptr A pointer to a valid C object
	* @return A pointer to the same object bypassing the L1 dcache
	*/
	static inline void arch_xtensa_uncached_ptr(void __sparse_cache ptr)
	{
	return (void *)z_xtrpoflip((uint32_t) ptr,
	CONFIG_XTENSA_UNCACHED_REGION,
	CONFIG_XTENSA_CACHED_REGION);
	}

	/* Utility to generate an unrolled and optimal[1] code sequence to set
	* the RPO TLB registers (contra the HAL cacheattr macros, which
	* generate larger code and can't be called from C), based on the
	* KERNEL_COHERENCE configuration in use. Selects RPO attribute "2"
	* for regions (including MMIO registers in region zero) which want to
	* bypass L1, "4" for the cached region which wants writeback, and
	* "15" (invalid) elsewhere.
	*
	* Note that on cores that have the "translation" option set, we need
	* to put an identity mapping in the high bits. Also per spec
	* changing the current code region (by definition cached) requires
	* that WITLB be followed by an ISYNC and that both instructions live
	* in the same cache line (two 3-byte instructions fit in an 8-byte
	* aligned region, so that's guaranteed not to cross a cache line
	* boundary).
	*
	* [1] With the sole exception of gcc's infuriating insistence on
	* emitting a precomputed literal for addr + addrincr instead of
	* computing it with a single ADD instruction from values it already
	* has in registers. Explicitly assigning the variables to registers
	* via an attribute works, but then emits needless MOV instructions
	* instead. I tell myself it's just 32 bytes of .text, but... Sigh.
	*/
	#define _REGION_ATTR(r) \
	((r) == 0 ? 2 : \
	((r) == CONFIG_XTENSA_CACHED_REGION ? 4 : \
	((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15)))

	#define _SET_ONE_TLB(region) do { \
	uint32_t attr = _REGION_ATTR(region); \
	if (XCHAL_HAVE_XLT_CACHEATTR) { \
	attr \|= addr; /* RPO with translation */ \
	} \
	if (region != CONFIG_XTENSA_CACHED_REGION) { \
	__asm__ volatile("wdtlb %0, %1; witlb %0, %1" \
	:: "r"(attr), "r"(addr)); \
	} else { \
	__asm__ volatile("wdtlb %0, %1" \
	:: "r"(attr), "r"(addr)); \
	__asm__ volatile("j 1f; .align 8; 1:"); \
	__asm__ volatile("witlb %0, %1; isync" \
	:: "r"(attr), "r"(addr)); \
	} \
	addr += addrincr; \
	} while (0)

	#define ARCH_XTENSA_SET_RPO_TLB() do { \
	register uint32_t addr = 0, addrincr = 0x20000000; \
	FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7); \
	} while (0)

	#endif

	#ifdef __cplusplus
	} /* extern "C" */
	#endif

	#endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ */